diff options
55 files changed, 2471 insertions, 1644 deletions
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 4542651e6acb..5f43697aed30 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
| @@ -371,6 +371,7 @@ struct kvm_vcpu_arch { | |||
| 371 | int last_run_cpu; | 371 | int last_run_cpu; |
| 372 | int vmm_tr_slot; | 372 | int vmm_tr_slot; |
| 373 | int vm_tr_slot; | 373 | int vm_tr_slot; |
| 374 | int sn_rtc_tr_slot; | ||
| 374 | 375 | ||
| 375 | #define KVM_MP_STATE_RUNNABLE 0 | 376 | #define KVM_MP_STATE_RUNNABLE 0 |
| 376 | #define KVM_MP_STATE_UNINITIALIZED 1 | 377 | #define KVM_MP_STATE_UNINITIALIZED 1 |
| @@ -465,6 +466,7 @@ struct kvm_arch { | |||
| 465 | unsigned long vmm_init_rr; | 466 | unsigned long vmm_init_rr; |
| 466 | 467 | ||
| 467 | int online_vcpus; | 468 | int online_vcpus; |
| 469 | int is_sn2; | ||
| 468 | 470 | ||
| 469 | struct kvm_ioapic *vioapic; | 471 | struct kvm_ioapic *vioapic; |
| 470 | struct kvm_vm_stat stat; | 472 | struct kvm_vm_stat stat; |
| @@ -472,6 +474,7 @@ struct kvm_arch { | |||
| 472 | 474 | ||
| 473 | struct list_head assigned_dev_head; | 475 | struct list_head assigned_dev_head; |
| 474 | struct iommu_domain *iommu_domain; | 476 | struct iommu_domain *iommu_domain; |
| 477 | int iommu_flags; | ||
| 475 | struct hlist_head irq_ack_notifier_list; | 478 | struct hlist_head irq_ack_notifier_list; |
| 476 | 479 | ||
| 477 | unsigned long irq_sources_bitmap; | 480 | unsigned long irq_sources_bitmap; |
| @@ -578,6 +581,8 @@ struct kvm_vmm_info{ | |||
| 578 | kvm_vmm_entry *vmm_entry; | 581 | kvm_vmm_entry *vmm_entry; |
| 579 | kvm_tramp_entry *tramp_entry; | 582 | kvm_tramp_entry *tramp_entry; |
| 580 | unsigned long vmm_ivt; | 583 | unsigned long vmm_ivt; |
| 584 | unsigned long patch_mov_ar; | ||
| 585 | unsigned long patch_mov_ar_sn2; | ||
| 581 | }; | 586 | }; |
| 582 | 587 | ||
| 583 | int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); | 588 | int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); |
| @@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu); | |||
| 585 | int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); | 590 | int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); |
| 586 | void kvm_sal_emul(struct kvm_vcpu *vcpu); | 591 | void kvm_sal_emul(struct kvm_vcpu *vcpu); |
| 587 | 592 | ||
| 588 | static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} | ||
| 589 | #endif /* __ASSEMBLY__*/ | 593 | #endif /* __ASSEMBLY__*/ |
| 590 | 594 | ||
| 591 | #endif | 595 | #endif |
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 7a9bff47564f..0a9cc73d35c7 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h | |||
| @@ -146,6 +146,8 @@ | |||
| 146 | #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) | 146 | #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) |
| 147 | #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) | 147 | #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) |
| 148 | #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) | 148 | #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) |
| 149 | #define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \ | ||
| 150 | _PAGE_MA_UC) | ||
| 149 | 151 | ||
| 150 | # ifndef __ASSEMBLY__ | 152 | # ifndef __ASSEMBLY__ |
| 151 | 153 | ||
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index acc4d19ae62a..b448197728be 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c | |||
| @@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = { | |||
| 610 | .name = "IPI" | 610 | .name = "IPI" |
| 611 | }; | 611 | }; |
| 612 | 612 | ||
| 613 | /* | ||
| 614 | * KVM uses this interrupt to force a cpu out of guest mode | ||
| 615 | */ | ||
| 613 | static struct irqaction resched_irqaction = { | 616 | static struct irqaction resched_irqaction = { |
| 614 | .handler = dummy_handler, | 617 | .handler = dummy_handler, |
| 615 | .flags = IRQF_DISABLED, | 618 | .flags = IRQF_DISABLED, |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 0a2d6b86075a..64d520937874 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig | |||
| @@ -23,7 +23,7 @@ if VIRTUALIZATION | |||
| 23 | 23 | ||
| 24 | config KVM | 24 | config KVM |
| 25 | tristate "Kernel-based Virtual Machine (KVM) support" | 25 | tristate "Kernel-based Virtual Machine (KVM) support" |
| 26 | depends on HAVE_KVM && EXPERIMENTAL | 26 | depends on HAVE_KVM && MODULES && EXPERIMENTAL |
| 27 | # for device assignment: | 27 | # for device assignment: |
| 28 | depends on PCI | 28 | depends on PCI |
| 29 | select PREEMPT_NOTIFIERS | 29 | select PREEMPT_NOTIFIERS |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d20a5db4c4dd..80c57b0a21c4 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -41,6 +41,9 @@ | |||
| 41 | #include <asm/div64.h> | 41 | #include <asm/div64.h> |
| 42 | #include <asm/tlb.h> | 42 | #include <asm/tlb.h> |
| 43 | #include <asm/elf.h> | 43 | #include <asm/elf.h> |
| 44 | #include <asm/sn/addrs.h> | ||
| 45 | #include <asm/sn/clksupport.h> | ||
| 46 | #include <asm/sn/shub_mmr.h> | ||
| 44 | 47 | ||
| 45 | #include "misc.h" | 48 | #include "misc.h" |
| 46 | #include "vti.h" | 49 | #include "vti.h" |
| @@ -65,6 +68,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 65 | { NULL } | 68 | { NULL } |
| 66 | }; | 69 | }; |
| 67 | 70 | ||
| 71 | static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu) | ||
| 72 | { | ||
| 73 | #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) | ||
| 74 | if (vcpu->kvm->arch.is_sn2) | ||
| 75 | return rtc_time(); | ||
| 76 | else | ||
| 77 | #endif | ||
| 78 | return ia64_getreg(_IA64_REG_AR_ITC); | ||
| 79 | } | ||
| 80 | |||
| 68 | static void kvm_flush_icache(unsigned long start, unsigned long len) | 81 | static void kvm_flush_icache(unsigned long start, unsigned long len) |
| 69 | { | 82 | { |
| 70 | int l; | 83 | int l; |
| @@ -119,8 +132,7 @@ void kvm_arch_hardware_enable(void *garbage) | |||
| 119 | unsigned long saved_psr; | 132 | unsigned long saved_psr; |
| 120 | int slot; | 133 | int slot; |
| 121 | 134 | ||
| 122 | pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), | 135 | pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); |
| 123 | PAGE_KERNEL)); | ||
| 124 | local_irq_save(saved_psr); | 136 | local_irq_save(saved_psr); |
| 125 | slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); | 137 | slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); |
| 126 | local_irq_restore(saved_psr); | 138 | local_irq_restore(saved_psr); |
| @@ -283,6 +295,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 283 | 295 | ||
| 284 | } | 296 | } |
| 285 | 297 | ||
| 298 | static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) | ||
| 299 | { | ||
| 300 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
| 301 | |||
| 302 | if (!test_and_set_bit(vector, &vpd->irr[0])) { | ||
| 303 | vcpu->arch.irq_new_pending = 1; | ||
| 304 | kvm_vcpu_kick(vcpu); | ||
| 305 | return 1; | ||
| 306 | } | ||
| 307 | return 0; | ||
| 308 | } | ||
| 309 | |||
| 286 | /* | 310 | /* |
| 287 | * offset: address offset to IPI space. | 311 | * offset: address offset to IPI space. |
| 288 | * value: deliver value. | 312 | * value: deliver value. |
| @@ -292,20 +316,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, | |||
| 292 | { | 316 | { |
| 293 | switch (dm) { | 317 | switch (dm) { |
| 294 | case SAPIC_FIXED: | 318 | case SAPIC_FIXED: |
| 295 | kvm_apic_set_irq(vcpu, vector, 0); | ||
| 296 | break; | 319 | break; |
| 297 | case SAPIC_NMI: | 320 | case SAPIC_NMI: |
| 298 | kvm_apic_set_irq(vcpu, 2, 0); | 321 | vector = 2; |
| 299 | break; | 322 | break; |
| 300 | case SAPIC_EXTINT: | 323 | case SAPIC_EXTINT: |
| 301 | kvm_apic_set_irq(vcpu, 0, 0); | 324 | vector = 0; |
| 302 | break; | 325 | break; |
| 303 | case SAPIC_INIT: | 326 | case SAPIC_INIT: |
| 304 | case SAPIC_PMI: | 327 | case SAPIC_PMI: |
| 305 | default: | 328 | default: |
| 306 | printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); | 329 | printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); |
| 307 | break; | 330 | return; |
| 308 | } | 331 | } |
| 332 | __apic_accept_irq(vcpu, vector); | ||
| 309 | } | 333 | } |
| 310 | 334 | ||
| 311 | static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, | 335 | static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, |
| @@ -413,6 +437,23 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 413 | return 1; | 437 | return 1; |
| 414 | } | 438 | } |
| 415 | 439 | ||
| 440 | static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu) | ||
| 441 | { | ||
| 442 | unsigned long pte, rtc_phys_addr, map_addr; | ||
| 443 | int slot; | ||
| 444 | |||
| 445 | map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT); | ||
| 446 | rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC; | ||
| 447 | pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC)); | ||
| 448 | slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT); | ||
| 449 | vcpu->arch.sn_rtc_tr_slot = slot; | ||
| 450 | if (slot < 0) { | ||
| 451 | printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n"); | ||
| 452 | slot = 0; | ||
| 453 | } | ||
| 454 | return slot; | ||
| 455 | } | ||
| 456 | |||
| 416 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) | 457 | int kvm_emulate_halt(struct kvm_vcpu *vcpu) |
| 417 | { | 458 | { |
| 418 | 459 | ||
| @@ -426,7 +467,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
| 426 | 467 | ||
| 427 | if (irqchip_in_kernel(vcpu->kvm)) { | 468 | if (irqchip_in_kernel(vcpu->kvm)) { |
| 428 | 469 | ||
| 429 | vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; | 470 | vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset; |
| 430 | 471 | ||
| 431 | if (time_after(vcpu_now_itc, vpd->itm)) { | 472 | if (time_after(vcpu_now_itc, vpd->itm)) { |
| 432 | vcpu->arch.timer_check = 1; | 473 | vcpu->arch.timer_check = 1; |
| @@ -447,10 +488,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
| 447 | hrtimer_cancel(p_ht); | 488 | hrtimer_cancel(p_ht); |
| 448 | vcpu->arch.ht_active = 0; | 489 | vcpu->arch.ht_active = 0; |
| 449 | 490 | ||
| 450 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 491 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) || |
| 492 | kvm_cpu_has_pending_timer(vcpu)) | ||
| 451 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | 493 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) |
| 452 | vcpu->arch.mp_state = | 494 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
| 453 | KVM_MP_STATE_RUNNABLE; | ||
| 454 | 495 | ||
| 455 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | 496 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) |
| 456 | return -EINTR; | 497 | return -EINTR; |
| @@ -551,22 +592,35 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) | |||
| 551 | if (r < 0) | 592 | if (r < 0) |
| 552 | goto out; | 593 | goto out; |
| 553 | vcpu->arch.vm_tr_slot = r; | 594 | vcpu->arch.vm_tr_slot = r; |
| 595 | |||
| 596 | #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) | ||
| 597 | if (kvm->arch.is_sn2) { | ||
| 598 | r = kvm_sn2_setup_mappings(vcpu); | ||
| 599 | if (r < 0) | ||
| 600 | goto out; | ||
| 601 | } | ||
| 602 | #endif | ||
| 603 | |||
| 554 | r = 0; | 604 | r = 0; |
| 555 | out: | 605 | out: |
| 556 | return r; | 606 | return r; |
| 557 | |||
| 558 | } | 607 | } |
| 559 | 608 | ||
| 560 | static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) | 609 | static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) |
| 561 | { | 610 | { |
| 562 | 611 | struct kvm *kvm = vcpu->kvm; | |
| 563 | ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); | 612 | ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); |
| 564 | ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); | 613 | ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); |
| 565 | 614 | #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) | |
| 615 | if (kvm->arch.is_sn2) | ||
| 616 | ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot); | ||
| 617 | #endif | ||
| 566 | } | 618 | } |
| 567 | 619 | ||
| 568 | static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) | 620 | static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) |
| 569 | { | 621 | { |
| 622 | unsigned long psr; | ||
| 623 | int r; | ||
| 570 | int cpu = smp_processor_id(); | 624 | int cpu = smp_processor_id(); |
| 571 | 625 | ||
| 572 | if (vcpu->arch.last_run_cpu != cpu || | 626 | if (vcpu->arch.last_run_cpu != cpu || |
| @@ -578,36 +632,27 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) | |||
| 578 | 632 | ||
| 579 | vcpu->arch.host_rr6 = ia64_get_rr(RR6); | 633 | vcpu->arch.host_rr6 = ia64_get_rr(RR6); |
| 580 | vti_set_rr6(vcpu->arch.vmm_rr); | 634 | vti_set_rr6(vcpu->arch.vmm_rr); |
| 581 | return kvm_insert_vmm_mapping(vcpu); | 635 | local_irq_save(psr); |
| 636 | r = kvm_insert_vmm_mapping(vcpu); | ||
| 637 | local_irq_restore(psr); | ||
| 638 | return r; | ||
| 582 | } | 639 | } |
| 640 | |||
| 583 | static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) | 641 | static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) |
| 584 | { | 642 | { |
| 585 | kvm_purge_vmm_mapping(vcpu); | 643 | kvm_purge_vmm_mapping(vcpu); |
| 586 | vti_set_rr6(vcpu->arch.host_rr6); | 644 | vti_set_rr6(vcpu->arch.host_rr6); |
| 587 | } | 645 | } |
| 588 | 646 | ||
| 589 | static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 647 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 590 | { | 648 | { |
| 591 | union context *host_ctx, *guest_ctx; | 649 | union context *host_ctx, *guest_ctx; |
| 592 | int r; | 650 | int r; |
| 593 | 651 | ||
| 594 | /*Get host and guest context with guest address space.*/ | 652 | /* |
| 595 | host_ctx = kvm_get_host_context(vcpu); | 653 | * down_read() may sleep and return with interrupts enabled |
| 596 | guest_ctx = kvm_get_guest_context(vcpu); | 654 | */ |
| 597 | 655 | down_read(&vcpu->kvm->slots_lock); | |
| 598 | r = kvm_vcpu_pre_transition(vcpu); | ||
| 599 | if (r < 0) | ||
| 600 | goto out; | ||
| 601 | kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); | ||
| 602 | kvm_vcpu_post_transition(vcpu); | ||
| 603 | r = 0; | ||
| 604 | out: | ||
| 605 | return r; | ||
| 606 | } | ||
| 607 | |||
| 608 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
| 609 | { | ||
| 610 | int r; | ||
| 611 | 656 | ||
| 612 | again: | 657 | again: |
| 613 | if (signal_pending(current)) { | 658 | if (signal_pending(current)) { |
| @@ -616,26 +661,31 @@ again: | |||
| 616 | goto out; | 661 | goto out; |
| 617 | } | 662 | } |
| 618 | 663 | ||
| 619 | /* | ||
| 620 | * down_read() may sleep and return with interrupts enabled | ||
| 621 | */ | ||
| 622 | down_read(&vcpu->kvm->slots_lock); | ||
| 623 | |||
| 624 | preempt_disable(); | 664 | preempt_disable(); |
| 625 | local_irq_disable(); | 665 | local_irq_disable(); |
| 626 | 666 | ||
| 627 | vcpu->guest_mode = 1; | 667 | /*Get host and guest context with guest address space.*/ |
| 668 | host_ctx = kvm_get_host_context(vcpu); | ||
| 669 | guest_ctx = kvm_get_guest_context(vcpu); | ||
| 670 | |||
| 671 | clear_bit(KVM_REQ_KICK, &vcpu->requests); | ||
| 672 | |||
| 673 | r = kvm_vcpu_pre_transition(vcpu); | ||
| 674 | if (r < 0) | ||
| 675 | goto vcpu_run_fail; | ||
| 676 | |||
| 677 | up_read(&vcpu->kvm->slots_lock); | ||
| 628 | kvm_guest_enter(); | 678 | kvm_guest_enter(); |
| 629 | r = vti_vcpu_run(vcpu, kvm_run); | 679 | |
| 630 | if (r < 0) { | 680 | /* |
| 631 | local_irq_enable(); | 681 | * Transition to the guest |
| 632 | preempt_enable(); | 682 | */ |
| 633 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 683 | kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); |
| 634 | goto out; | 684 | |
| 635 | } | 685 | kvm_vcpu_post_transition(vcpu); |
| 636 | 686 | ||
| 637 | vcpu->arch.launched = 1; | 687 | vcpu->arch.launched = 1; |
| 638 | vcpu->guest_mode = 0; | 688 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
| 639 | local_irq_enable(); | 689 | local_irq_enable(); |
| 640 | 690 | ||
| 641 | /* | 691 | /* |
| @@ -646,9 +696,10 @@ again: | |||
| 646 | */ | 696 | */ |
| 647 | barrier(); | 697 | barrier(); |
| 648 | kvm_guest_exit(); | 698 | kvm_guest_exit(); |
| 649 | up_read(&vcpu->kvm->slots_lock); | ||
| 650 | preempt_enable(); | 699 | preempt_enable(); |
| 651 | 700 | ||
| 701 | down_read(&vcpu->kvm->slots_lock); | ||
| 702 | |||
| 652 | r = kvm_handle_exit(kvm_run, vcpu); | 703 | r = kvm_handle_exit(kvm_run, vcpu); |
| 653 | 704 | ||
| 654 | if (r > 0) { | 705 | if (r > 0) { |
| @@ -657,12 +708,20 @@ again: | |||
| 657 | } | 708 | } |
| 658 | 709 | ||
| 659 | out: | 710 | out: |
| 711 | up_read(&vcpu->kvm->slots_lock); | ||
| 660 | if (r > 0) { | 712 | if (r > 0) { |
| 661 | kvm_resched(vcpu); | 713 | kvm_resched(vcpu); |
| 714 | down_read(&vcpu->kvm->slots_lock); | ||
| 662 | goto again; | 715 | goto again; |
| 663 | } | 716 | } |
| 664 | 717 | ||
| 665 | return r; | 718 | return r; |
| 719 | |||
| 720 | vcpu_run_fail: | ||
| 721 | local_irq_enable(); | ||
| 722 | preempt_enable(); | ||
| 723 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | ||
| 724 | goto out; | ||
| 666 | } | 725 | } |
| 667 | 726 | ||
| 668 | static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) | 727 | static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) |
| @@ -788,6 +847,9 @@ struct kvm *kvm_arch_create_vm(void) | |||
| 788 | 847 | ||
| 789 | if (IS_ERR(kvm)) | 848 | if (IS_ERR(kvm)) |
| 790 | return ERR_PTR(-ENOMEM); | 849 | return ERR_PTR(-ENOMEM); |
| 850 | |||
| 851 | kvm->arch.is_sn2 = ia64_platform_is("sn2"); | ||
| 852 | |||
| 791 | kvm_init_vm(kvm); | 853 | kvm_init_vm(kvm); |
| 792 | 854 | ||
| 793 | kvm->arch.online_vcpus = 0; | 855 | kvm->arch.online_vcpus = 0; |
| @@ -884,7 +946,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 884 | RESTORE_REGS(saved_gp); | 946 | RESTORE_REGS(saved_gp); |
| 885 | 947 | ||
| 886 | vcpu->arch.irq_new_pending = 1; | 948 | vcpu->arch.irq_new_pending = 1; |
| 887 | vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); | 949 | vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); |
| 888 | set_bit(KVM_REQ_RESUME, &vcpu->requests); | 950 | set_bit(KVM_REQ_RESUME, &vcpu->requests); |
| 889 | 951 | ||
| 890 | vcpu_put(vcpu); | 952 | vcpu_put(vcpu); |
| @@ -1043,10 +1105,6 @@ static void kvm_free_vmm_area(void) | |||
| 1043 | } | 1105 | } |
| 1044 | } | 1106 | } |
| 1045 | 1107 | ||
| 1046 | static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | ||
| 1047 | { | ||
| 1048 | } | ||
| 1049 | |||
| 1050 | static int vti_init_vpd(struct kvm_vcpu *vcpu) | 1108 | static int vti_init_vpd(struct kvm_vcpu *vcpu) |
| 1051 | { | 1109 | { |
| 1052 | int i; | 1110 | int i; |
| @@ -1165,7 +1223,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 1165 | regs->cr_iip = PALE_RESET_ENTRY; | 1223 | regs->cr_iip = PALE_RESET_ENTRY; |
| 1166 | 1224 | ||
| 1167 | /*Initialize itc offset for vcpus*/ | 1225 | /*Initialize itc offset for vcpus*/ |
| 1168 | itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); | 1226 | itc_offset = 0UL - kvm_get_itc(vcpu); |
| 1169 | for (i = 0; i < kvm->arch.online_vcpus; i++) { | 1227 | for (i = 0; i < kvm->arch.online_vcpus; i++) { |
| 1170 | v = (struct kvm_vcpu *)((char *)vcpu + | 1228 | v = (struct kvm_vcpu *)((char *)vcpu + |
| 1171 | sizeof(struct kvm_vcpu_data) * i); | 1229 | sizeof(struct kvm_vcpu_data) * i); |
| @@ -1237,6 +1295,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) | |||
| 1237 | 1295 | ||
| 1238 | local_irq_save(psr); | 1296 | local_irq_save(psr); |
| 1239 | r = kvm_insert_vmm_mapping(vcpu); | 1297 | r = kvm_insert_vmm_mapping(vcpu); |
| 1298 | local_irq_restore(psr); | ||
| 1240 | if (r) | 1299 | if (r) |
| 1241 | goto fail; | 1300 | goto fail; |
| 1242 | r = kvm_vcpu_init(vcpu, vcpu->kvm, id); | 1301 | r = kvm_vcpu_init(vcpu, vcpu->kvm, id); |
| @@ -1254,13 +1313,11 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) | |||
| 1254 | goto uninit; | 1313 | goto uninit; |
| 1255 | 1314 | ||
| 1256 | kvm_purge_vmm_mapping(vcpu); | 1315 | kvm_purge_vmm_mapping(vcpu); |
| 1257 | local_irq_restore(psr); | ||
| 1258 | 1316 | ||
| 1259 | return 0; | 1317 | return 0; |
| 1260 | uninit: | 1318 | uninit: |
| 1261 | kvm_vcpu_uninit(vcpu); | 1319 | kvm_vcpu_uninit(vcpu); |
| 1262 | fail: | 1320 | fail: |
| 1263 | local_irq_restore(psr); | ||
| 1264 | return r; | 1321 | return r; |
| 1265 | } | 1322 | } |
| 1266 | 1323 | ||
| @@ -1291,7 +1348,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
| 1291 | vcpu->kvm = kvm; | 1348 | vcpu->kvm = kvm; |
| 1292 | 1349 | ||
| 1293 | cpu = get_cpu(); | 1350 | cpu = get_cpu(); |
| 1294 | vti_vcpu_load(vcpu, cpu); | ||
| 1295 | r = vti_vcpu_setup(vcpu, id); | 1351 | r = vti_vcpu_setup(vcpu, id); |
| 1296 | put_cpu(); | 1352 | put_cpu(); |
| 1297 | 1353 | ||
| @@ -1427,7 +1483,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 1427 | } | 1483 | } |
| 1428 | for (i = 0; i < 4; i++) | 1484 | for (i = 0; i < 4; i++) |
| 1429 | regs->insvc[i] = vcpu->arch.insvc[i]; | 1485 | regs->insvc[i] = vcpu->arch.insvc[i]; |
| 1430 | regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); | 1486 | regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu); |
| 1431 | SAVE_REGS(xtp); | 1487 | SAVE_REGS(xtp); |
| 1432 | SAVE_REGS(metaphysical_rr0); | 1488 | SAVE_REGS(metaphysical_rr0); |
| 1433 | SAVE_REGS(metaphysical_rr4); | 1489 | SAVE_REGS(metaphysical_rr4); |
| @@ -1574,6 +1630,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 1574 | 1630 | ||
| 1575 | void kvm_arch_flush_shadow(struct kvm *kvm) | 1631 | void kvm_arch_flush_shadow(struct kvm *kvm) |
| 1576 | { | 1632 | { |
| 1633 | kvm_flush_remote_tlbs(kvm); | ||
| 1577 | } | 1634 | } |
| 1578 | 1635 | ||
| 1579 | long kvm_arch_dev_ioctl(struct file *filp, | 1636 | long kvm_arch_dev_ioctl(struct file *filp, |
| @@ -1616,8 +1673,37 @@ out: | |||
| 1616 | return 0; | 1673 | return 0; |
| 1617 | } | 1674 | } |
| 1618 | 1675 | ||
| 1676 | |||
| 1677 | /* | ||
| 1678 | * On SN2, the ITC isn't stable, so copy in fast path code to use the | ||
| 1679 | * SN2 RTC, replacing the ITC based default verion. | ||
| 1680 | */ | ||
| 1681 | static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info, | ||
| 1682 | struct module *module) | ||
| 1683 | { | ||
| 1684 | unsigned long new_ar, new_ar_sn2; | ||
| 1685 | unsigned long module_base; | ||
| 1686 | |||
| 1687 | if (!ia64_platform_is("sn2")) | ||
| 1688 | return; | ||
| 1689 | |||
| 1690 | module_base = (unsigned long)module->module_core; | ||
| 1691 | |||
| 1692 | new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base; | ||
| 1693 | new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base; | ||
| 1694 | |||
| 1695 | printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC " | ||
| 1696 | "as source\n"); | ||
| 1697 | |||
| 1698 | /* | ||
| 1699 | * Copy the SN2 version of mov_ar into place. They are both | ||
| 1700 | * the same size, so 6 bundles is sufficient (6 * 0x10). | ||
| 1701 | */ | ||
| 1702 | memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60); | ||
| 1703 | } | ||
| 1704 | |||
| 1619 | static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, | 1705 | static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, |
| 1620 | struct module *module) | 1706 | struct module *module) |
| 1621 | { | 1707 | { |
| 1622 | unsigned long module_base; | 1708 | unsigned long module_base; |
| 1623 | unsigned long vmm_size; | 1709 | unsigned long vmm_size; |
| @@ -1639,6 +1725,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, | |||
| 1639 | return -EFAULT; | 1725 | return -EFAULT; |
| 1640 | 1726 | ||
| 1641 | memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); | 1727 | memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); |
| 1728 | kvm_patch_vmm(vmm_info, module); | ||
| 1642 | kvm_flush_icache(kvm_vmm_base, vmm_size); | 1729 | kvm_flush_icache(kvm_vmm_base, vmm_size); |
| 1643 | 1730 | ||
| 1644 | /*Recalculate kvm_vmm_info based on new VMM*/ | 1731 | /*Recalculate kvm_vmm_info based on new VMM*/ |
| @@ -1792,38 +1879,24 @@ void kvm_arch_hardware_unsetup(void) | |||
| 1792 | { | 1879 | { |
| 1793 | } | 1880 | } |
| 1794 | 1881 | ||
| 1795 | static void vcpu_kick_intr(void *info) | ||
| 1796 | { | ||
| 1797 | #ifdef DEBUG | ||
| 1798 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; | ||
| 1799 | printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); | ||
| 1800 | #endif | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | 1882 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) |
| 1804 | { | 1883 | { |
| 1805 | int ipi_pcpu = vcpu->cpu; | 1884 | int me; |
| 1806 | int cpu = get_cpu(); | 1885 | int cpu = vcpu->cpu; |
| 1807 | 1886 | ||
| 1808 | if (waitqueue_active(&vcpu->wq)) | 1887 | if (waitqueue_active(&vcpu->wq)) |
| 1809 | wake_up_interruptible(&vcpu->wq); | 1888 | wake_up_interruptible(&vcpu->wq); |
| 1810 | 1889 | ||
| 1811 | if (vcpu->guest_mode && cpu != ipi_pcpu) | 1890 | me = get_cpu(); |
| 1812 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); | 1891 | if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu)) |
| 1892 | if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) | ||
| 1893 | smp_send_reschedule(cpu); | ||
| 1813 | put_cpu(); | 1894 | put_cpu(); |
| 1814 | } | 1895 | } |
| 1815 | 1896 | ||
| 1816 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) | 1897 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) |
| 1817 | { | 1898 | { |
| 1818 | 1899 | return __apic_accept_irq(vcpu, irq->vector); | |
| 1819 | struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); | ||
| 1820 | |||
| 1821 | if (!test_and_set_bit(vec, &vpd->irr[0])) { | ||
| 1822 | vcpu->arch.irq_new_pending = 1; | ||
| 1823 | kvm_vcpu_kick(vcpu); | ||
| 1824 | return 1; | ||
| 1825 | } | ||
| 1826 | return 0; | ||
| 1827 | } | 1900 | } |
| 1828 | 1901 | ||
| 1829 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) | 1902 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) |
| @@ -1836,20 +1909,18 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
| 1836 | return 0; | 1909 | return 0; |
| 1837 | } | 1910 | } |
| 1838 | 1911 | ||
| 1839 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | 1912 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) |
| 1840 | unsigned long bitmap) | ||
| 1841 | { | 1913 | { |
| 1842 | struct kvm_vcpu *lvcpu = kvm->vcpus[0]; | 1914 | return vcpu1->arch.xtp - vcpu2->arch.xtp; |
| 1843 | int i; | 1915 | } |
| 1844 | |||
| 1845 | for (i = 1; i < kvm->arch.online_vcpus; i++) { | ||
| 1846 | if (!kvm->vcpus[i]) | ||
| 1847 | continue; | ||
| 1848 | if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) | ||
| 1849 | lvcpu = kvm->vcpus[i]; | ||
| 1850 | } | ||
| 1851 | 1916 | ||
| 1852 | return lvcpu; | 1917 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
| 1918 | int short_hand, int dest, int dest_mode) | ||
| 1919 | { | ||
| 1920 | struct kvm_lapic *target = vcpu->arch.apic; | ||
| 1921 | return (dest_mode == 0) ? | ||
| 1922 | kvm_apic_match_physical_addr(target, dest) : | ||
| 1923 | kvm_apic_match_logical_addr(target, dest); | ||
| 1853 | } | 1924 | } |
| 1854 | 1925 | ||
| 1855 | static int find_highest_bits(int *dat) | 1926 | static int find_highest_bits(int *dat) |
| @@ -1888,6 +1959,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | |||
| 1888 | return 0; | 1959 | return 0; |
| 1889 | } | 1960 | } |
| 1890 | 1961 | ||
| 1962 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | ||
| 1963 | { | ||
| 1964 | /* do real check here */ | ||
| 1965 | return 1; | ||
| 1966 | } | ||
| 1967 | |||
| 1891 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | 1968 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) |
| 1892 | { | 1969 | { |
| 1893 | return vcpu->arch.timer_fired; | 1970 | return vcpu->arch.timer_fired; |
| @@ -1918,6 +1995,7 @@ static int vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 1918 | long psr; | 1995 | long psr; |
| 1919 | local_irq_save(psr); | 1996 | local_irq_save(psr); |
| 1920 | r = kvm_insert_vmm_mapping(vcpu); | 1997 | r = kvm_insert_vmm_mapping(vcpu); |
| 1998 | local_irq_restore(psr); | ||
| 1921 | if (r) | 1999 | if (r) |
| 1922 | goto fail; | 2000 | goto fail; |
| 1923 | 2001 | ||
| @@ -1930,7 +2008,6 @@ static int vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 1930 | kvm_purge_vmm_mapping(vcpu); | 2008 | kvm_purge_vmm_mapping(vcpu); |
| 1931 | r = 0; | 2009 | r = 0; |
| 1932 | fail: | 2010 | fail: |
| 1933 | local_irq_restore(psr); | ||
| 1934 | return r; | 2011 | return r; |
| 1935 | } | 2012 | } |
| 1936 | 2013 | ||
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c index a8ae52ed5635..e4b82319881d 100644 --- a/arch/ia64/kvm/kvm_fw.c +++ b/arch/ia64/kvm/kvm_fw.c | |||
| @@ -21,6 +21,9 @@ | |||
| 21 | 21 | ||
| 22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| 23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
| 24 | #include <asm/sn/addrs.h> | ||
| 25 | #include <asm/sn/clksupport.h> | ||
| 26 | #include <asm/sn/shub_mmr.h> | ||
| 24 | 27 | ||
| 25 | #include "vti.h" | 28 | #include "vti.h" |
| 26 | #include "misc.h" | 29 | #include "misc.h" |
| @@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) | |||
| 188 | return result; | 191 | return result; |
| 189 | } | 192 | } |
| 190 | 193 | ||
| 191 | static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) | 194 | /* |
| 195 | * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2 | ||
| 196 | * RTC is used instead. This function patches the ratios from SAL | ||
| 197 | * to match the RTC before providing them to the guest. | ||
| 198 | */ | ||
| 199 | static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result) | ||
| 192 | { | 200 | { |
| 201 | struct pal_freq_ratio *ratio; | ||
| 202 | unsigned long sal_freq, sal_drift, factor; | ||
| 203 | |||
| 204 | result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, | ||
| 205 | &sal_freq, &sal_drift); | ||
| 206 | ratio = (struct pal_freq_ratio *)&result->v2; | ||
| 207 | factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) / | ||
| 208 | sn_rtc_cycles_per_second; | ||
| 209 | |||
| 210 | ratio->num = 3; | ||
| 211 | ratio->den = factor; | ||
| 212 | } | ||
| 193 | 213 | ||
| 214 | static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) | ||
| 215 | { | ||
| 194 | struct ia64_pal_retval result; | 216 | struct ia64_pal_retval result; |
| 195 | 217 | ||
| 196 | PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); | 218 | PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); |
| 219 | |||
| 220 | if (vcpu->kvm->arch.is_sn2) | ||
| 221 | sn2_patch_itc_freq_ratios(&result); | ||
| 222 | |||
| 197 | return result; | 223 | return result; |
| 198 | } | 224 | } |
| 199 | 225 | ||
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index 6d6cbcb14893..ee541cebcd78 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h | |||
| @@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); | |||
| 20 | 20 | ||
| 21 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 21 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
| 22 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 22 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
| 23 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); | 23 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
| 24 | int short_hand, int dest, int dest_mode); | ||
| 25 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | ||
| 26 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); | ||
| 27 | #define kvm_apic_present(x) (true) | ||
| 24 | 28 | ||
| 25 | #endif | 29 | #endif |
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S index 32254ce9a1bd..f793be3effff 100644 --- a/arch/ia64/kvm/optvfault.S +++ b/arch/ia64/kvm/optvfault.S | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | #include <asm/asmmacro.h> | 12 | #include <asm/asmmacro.h> |
| 13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
| 14 | #include <asm/kvm_host.h> | ||
| 14 | 15 | ||
| 15 | #include "vti.h" | 16 | #include "vti.h" |
| 16 | #include "asm-offsets.h" | 17 | #include "asm-offsets.h" |
| @@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar) | |||
| 140 | ;; | 141 | ;; |
| 141 | END(kvm_asm_mov_from_ar) | 142 | END(kvm_asm_mov_from_ar) |
| 142 | 143 | ||
| 144 | /* | ||
| 145 | * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC | ||
| 146 | * clock as it's source for emulating the ITC. This version will be | ||
| 147 | * copied on top of the original version if the host is determined to | ||
| 148 | * be an SN2. | ||
| 149 | */ | ||
| 150 | GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2) | ||
| 151 | add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 | ||
| 152 | movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT)) | ||
| 153 | |||
| 154 | add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 | ||
| 155 | extr.u r17=r25,6,7 | ||
| 156 | mov r24=b0 | ||
| 157 | ;; | ||
| 158 | ld8 r18=[r18] | ||
| 159 | ld8 r19=[r19] | ||
| 160 | addl r20=@gprel(asm_mov_to_reg),gp | ||
| 161 | ;; | ||
| 162 | add r19=r19,r18 | ||
| 163 | shladd r17=r17,4,r20 | ||
| 164 | ;; | ||
| 165 | adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 | ||
| 166 | st8 [r16] = r19 | ||
| 167 | mov b0=r17 | ||
| 168 | br.sptk.few b0 | ||
| 169 | ;; | ||
| 170 | END(kvm_asm_mov_from_ar_sn2) | ||
| 171 | |||
| 172 | |||
| 143 | 173 | ||
| 144 | // mov r1=rr[r3] | 174 | // mov r1=rr[r3] |
| 145 | GLOBAL_ENTRY(kvm_asm_mov_from_rr) | 175 | GLOBAL_ENTRY(kvm_asm_mov_from_rr) |
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index b1dc80952d91..a8f84da04b49 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c | |||
| @@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, | |||
| 652 | unsigned long isr, unsigned long iim) | 652 | unsigned long isr, unsigned long iim) |
| 653 | { | 653 | { |
| 654 | struct kvm_vcpu *v = current_vcpu; | 654 | struct kvm_vcpu *v = current_vcpu; |
| 655 | long psr; | ||
| 655 | 656 | ||
| 656 | if (ia64_psr(regs)->cpl == 0) { | 657 | if (ia64_psr(regs)->cpl == 0) { |
| 657 | /* Allow hypercalls only when cpl = 0. */ | 658 | /* Allow hypercalls only when cpl = 0. */ |
| 658 | if (iim == DOMN_PAL_REQUEST) { | 659 | if (iim == DOMN_PAL_REQUEST) { |
| 660 | local_irq_save(psr); | ||
| 659 | set_pal_call_data(v); | 661 | set_pal_call_data(v); |
| 660 | vmm_transition(v); | 662 | vmm_transition(v); |
| 661 | get_pal_call_result(v); | 663 | get_pal_call_result(v); |
| 662 | vcpu_increment_iip(v); | 664 | vcpu_increment_iip(v); |
| 665 | local_irq_restore(psr); | ||
| 663 | return; | 666 | return; |
| 664 | } else if (iim == DOMN_SAL_REQUEST) { | 667 | } else if (iim == DOMN_SAL_REQUEST) { |
| 668 | local_irq_save(psr); | ||
| 665 | set_sal_call_data(v); | 669 | set_sal_call_data(v); |
| 666 | vmm_transition(v); | 670 | vmm_transition(v); |
| 667 | get_sal_call_result(v); | 671 | get_sal_call_result(v); |
| 668 | vcpu_increment_iip(v); | 672 | vcpu_increment_iip(v); |
| 673 | local_irq_restore(psr); | ||
| 669 | return; | 674 | return; |
| 670 | } | 675 | } |
| 671 | } | 676 | } |
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index a18ee17b9192..a2c6c15e4761 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c | |||
| @@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, | |||
| 788 | setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ | 788 | setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ |
| 789 | } | 789 | } |
| 790 | 790 | ||
| 791 | /* | ||
| 792 | * The Altix RTC is mapped specially here for the vmm module | ||
| 793 | */ | ||
| 794 | #define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT)) | ||
| 795 | static long kvm_get_itc(struct kvm_vcpu *vcpu) | ||
| 796 | { | ||
| 797 | #if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) | ||
| 798 | struct kvm *kvm = (struct kvm *)KVM_VM_BASE; | ||
| 799 | |||
| 800 | if (kvm->arch.is_sn2) | ||
| 801 | return (*SN_RTC_BASE); | ||
| 802 | else | ||
| 803 | #endif | ||
| 804 | return ia64_getreg(_IA64_REG_AR_ITC); | ||
| 805 | } | ||
| 806 | |||
| 791 | /************************************************************************ | 807 | /************************************************************************ |
| 792 | * lsapic timer | 808 | * lsapic timer |
| 793 | ***********************************************************************/ | 809 | ***********************************************************************/ |
| 794 | u64 vcpu_get_itc(struct kvm_vcpu *vcpu) | 810 | u64 vcpu_get_itc(struct kvm_vcpu *vcpu) |
| 795 | { | 811 | { |
| 796 | unsigned long guest_itc; | 812 | unsigned long guest_itc; |
| 797 | guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); | 813 | guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu); |
| 798 | 814 | ||
| 799 | if (guest_itc >= VMX(vcpu, last_itc)) { | 815 | if (guest_itc >= VMX(vcpu, last_itc)) { |
| 800 | VMX(vcpu, last_itc) = guest_itc; | 816 | VMX(vcpu, last_itc) = guest_itc; |
| @@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) | |||
| 809 | struct kvm_vcpu *v; | 825 | struct kvm_vcpu *v; |
| 810 | struct kvm *kvm; | 826 | struct kvm *kvm; |
| 811 | int i; | 827 | int i; |
| 812 | long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); | 828 | long itc_offset = val - kvm_get_itc(vcpu); |
| 813 | unsigned long vitv = VCPU(vcpu, itv); | 829 | unsigned long vitv = VCPU(vcpu, itv); |
| 814 | 830 | ||
| 815 | kvm = (struct kvm *)KVM_VM_BASE; | 831 | kvm = (struct kvm *)KVM_VM_BASE; |
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 9eee5c04bacc..f4b4c899bb6c 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c | |||
| @@ -30,15 +30,19 @@ MODULE_AUTHOR("Intel"); | |||
| 30 | MODULE_LICENSE("GPL"); | 30 | MODULE_LICENSE("GPL"); |
| 31 | 31 | ||
| 32 | extern char kvm_ia64_ivt; | 32 | extern char kvm_ia64_ivt; |
| 33 | extern char kvm_asm_mov_from_ar; | ||
| 34 | extern char kvm_asm_mov_from_ar_sn2; | ||
| 33 | extern fpswa_interface_t *vmm_fpswa_interface; | 35 | extern fpswa_interface_t *vmm_fpswa_interface; |
| 34 | 36 | ||
| 35 | long vmm_sanity = 1; | 37 | long vmm_sanity = 1; |
| 36 | 38 | ||
| 37 | struct kvm_vmm_info vmm_info = { | 39 | struct kvm_vmm_info vmm_info = { |
| 38 | .module = THIS_MODULE, | 40 | .module = THIS_MODULE, |
| 39 | .vmm_entry = vmm_entry, | 41 | .vmm_entry = vmm_entry, |
| 40 | .tramp_entry = vmm_trampoline, | 42 | .tramp_entry = vmm_trampoline, |
| 41 | .vmm_ivt = (unsigned long)&kvm_ia64_ivt, | 43 | .vmm_ivt = (unsigned long)&kvm_ia64_ivt, |
| 44 | .patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar, | ||
| 45 | .patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2, | ||
| 42 | }; | 46 | }; |
| 43 | 47 | ||
| 44 | static int __init kvm_vmm_init(void) | 48 | static int __init kvm_vmm_init(void) |
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index 3ef1a017a318..40920c630649 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S | |||
| @@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic) | |||
| 95 | ;; | 95 | ;; |
| 96 | srlz.i // guarantee that interruption collection is on | 96 | srlz.i // guarantee that interruption collection is on |
| 97 | ;; | 97 | ;; |
| 98 | //(p15) ssm psr.i // restore psr.i | 98 | (p15) ssm psr.i // restore psr. |
| 99 | addl r14=@gprel(ia64_leave_hypervisor),gp | 99 | addl r14=@gprel(ia64_leave_hypervisor),gp |
| 100 | ;; | 100 | ;; |
| 101 | KVM_SAVE_REST | 101 | KVM_SAVE_REST |
| @@ -249,7 +249,7 @@ ENTRY(kvm_break_fault) | |||
| 249 | ;; | 249 | ;; |
| 250 | srlz.i // guarantee that interruption collection is on | 250 | srlz.i // guarantee that interruption collection is on |
| 251 | ;; | 251 | ;; |
| 252 | //(p15)ssm psr.i // restore psr.i | 252 | (p15)ssm psr.i // restore psr.i |
| 253 | addl r14=@gprel(ia64_leave_hypervisor),gp | 253 | addl r14=@gprel(ia64_leave_hypervisor),gp |
| 254 | ;; | 254 | ;; |
| 255 | KVM_SAVE_REST | 255 | KVM_SAVE_REST |
| @@ -439,7 +439,7 @@ kvm_dispatch_vexirq: | |||
| 439 | ;; | 439 | ;; |
| 440 | srlz.i // guarantee that interruption collection is on | 440 | srlz.i // guarantee that interruption collection is on |
| 441 | ;; | 441 | ;; |
| 442 | //(p15) ssm psr.i // restore psr.i | 442 | (p15) ssm psr.i // restore psr.i |
| 443 | adds r3=8,r2 // set up second base pointer | 443 | adds r3=8,r2 // set up second base pointer |
| 444 | ;; | 444 | ;; |
| 445 | KVM_SAVE_REST | 445 | KVM_SAVE_REST |
| @@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch) | |||
| 819 | ;; | 819 | ;; |
| 820 | srlz.i // guarantee that interruption collection is on | 820 | srlz.i // guarantee that interruption collection is on |
| 821 | ;; | 821 | ;; |
| 822 | //(p15) ssm psr.i // restore psr.i | 822 | (p15) ssm psr.i // restore psr.i |
| 823 | addl r14=@gprel(ia64_leave_hypervisor_prepare),gp | 823 | addl r14=@gprel(ia64_leave_hypervisor_prepare),gp |
| 824 | ;; | 824 | ;; |
| 825 | KVM_SAVE_REST | 825 | KVM_SAVE_REST |
| @@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch) | |||
| 842 | ;; | 842 | ;; |
| 843 | srlz.i // guarantee that interruption collection is on | 843 | srlz.i // guarantee that interruption collection is on |
| 844 | ;; | 844 | ;; |
| 845 | //(p15) ssm psr.i // restore psr.i | 845 | (p15) ssm psr.i // restore psr.i |
| 846 | addl r14=@gprel(ia64_leave_hypervisor),gp | 846 | addl r14=@gprel(ia64_leave_hypervisor),gp |
| 847 | ;; | 847 | ;; |
| 848 | KVM_SAVE_REST | 848 | KVM_SAVE_REST |
| @@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection) | |||
| 871 | ;; | 871 | ;; |
| 872 | srlz.i // guarantee that interruption collection is on | 872 | srlz.i // guarantee that interruption collection is on |
| 873 | ;; | 873 | ;; |
| 874 | //(p15) ssm psr.i // restore psr.i | 874 | (p15) ssm psr.i // restore psr.i |
| 875 | addl r14=@gprel(ia64_leave_hypervisor),gp | 875 | addl r14=@gprel(ia64_leave_hypervisor),gp |
| 876 | ;; | 876 | ;; |
| 877 | KVM_SAVE_REST | 877 | KVM_SAVE_REST |
| @@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault) | |||
| 898 | ;; | 898 | ;; |
| 899 | srlz.i // guarantee that interruption collection is on | 899 | srlz.i // guarantee that interruption collection is on |
| 900 | ;; | 900 | ;; |
| 901 | //(p15) ssm psr.i // restore psr.i | 901 | (p15) ssm psr.i // restore psr.i |
| 902 | addl r14=@gprel(ia64_leave_hypervisor_prepare),gp | 902 | addl r14=@gprel(ia64_leave_hypervisor_prepare),gp |
| 903 | ;; | 903 | ;; |
| 904 | KVM_SAVE_REST | 904 | KVM_SAVE_REST |
| @@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt) | |||
| 920 | ;; | 920 | ;; |
| 921 | srlz.i | 921 | srlz.i |
| 922 | ;; | 922 | ;; |
| 923 | //(p15) ssm psr.i | 923 | (p15) ssm psr.i |
| 924 | addl r14=@gprel(ia64_leave_hypervisor),gp | 924 | addl r14=@gprel(ia64_leave_hypervisor),gp |
| 925 | ;; | 925 | ;; |
| 926 | KVM_SAVE_REST | 926 | KVM_SAVE_REST |
| @@ -1333,7 +1333,7 @@ hostret = r24 | |||
| 1333 | ;; | 1333 | ;; |
| 1334 | (p7) srlz.i | 1334 | (p7) srlz.i |
| 1335 | ;; | 1335 | ;; |
| 1336 | //(p6) ssm psr.i | 1336 | (p6) ssm psr.i |
| 1337 | ;; | 1337 | ;; |
| 1338 | mov rp=rpsave | 1338 | mov rp=rpsave |
| 1339 | mov ar.pfs=pfssave | 1339 | mov ar.pfs=pfssave |
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 2c2501f13159..4290a429bf7c 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c | |||
| @@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) | |||
| 254 | "(p7) st8 [%2]=r9;;" | 254 | "(p7) st8 [%2]=r9;;" |
| 255 | "ssm psr.ic;;" | 255 | "ssm psr.ic;;" |
| 256 | "srlz.d;;" | 256 | "srlz.d;;" |
| 257 | /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ | 257 | "ssm psr.i;;" |
| 258 | "srlz.d;;" | ||
| 258 | : "=r"(ret) : "r"(iha), "r"(pte):"memory"); | 259 | : "=r"(ret) : "r"(iha), "r"(pte):"memory"); |
| 259 | 260 | ||
| 260 | return ret; | 261 | return ret; |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9057335fdc61..2cf915e51e7e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | |||
| 41 | return !!(v->arch.pending_exceptions); | 41 | return !!(v->arch.pending_exceptions); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | ||
| 45 | { | ||
| 46 | /* do real check here */ | ||
| 47 | return 1; | ||
| 48 | } | ||
| 49 | |||
| 44 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) | 50 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) |
| 45 | { | 51 | { |
| 46 | return !(v->arch.msr & MSR_WE); | 52 | return !(v->arch.msr & MSR_WE); |
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 54ea39f96ecd..a27d0d5a6f86 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h | |||
| @@ -13,6 +13,8 @@ | |||
| 13 | 13 | ||
| 14 | #ifndef ASM_KVM_HOST_H | 14 | #ifndef ASM_KVM_HOST_H |
| 15 | #define ASM_KVM_HOST_H | 15 | #define ASM_KVM_HOST_H |
| 16 | #include <linux/hrtimer.h> | ||
| 17 | #include <linux/interrupt.h> | ||
| 16 | #include <linux/kvm_host.h> | 18 | #include <linux/kvm_host.h> |
| 17 | #include <asm/debug.h> | 19 | #include <asm/debug.h> |
| 18 | #include <asm/cpuid.h> | 20 | #include <asm/cpuid.h> |
| @@ -210,7 +212,8 @@ struct kvm_vcpu_arch { | |||
| 210 | s390_fp_regs guest_fpregs; | 212 | s390_fp_regs guest_fpregs; |
| 211 | unsigned int guest_acrs[NUM_ACRS]; | 213 | unsigned int guest_acrs[NUM_ACRS]; |
| 212 | struct kvm_s390_local_interrupt local_int; | 214 | struct kvm_s390_local_interrupt local_int; |
| 213 | struct timer_list ckc_timer; | 215 | struct hrtimer ckc_timer; |
| 216 | struct tasklet_struct tasklet; | ||
| 214 | union { | 217 | union { |
| 215 | cpuid_t cpu_id; | 218 | cpuid_t cpu_id; |
| 216 | u64 stidp_data; | 219 | u64 stidp_data; |
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9d19803111ba..98997ccba501 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c | |||
| @@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu) | |||
| 154 | static int handle_validity(struct kvm_vcpu *vcpu) | 154 | static int handle_validity(struct kvm_vcpu *vcpu) |
| 155 | { | 155 | { |
| 156 | int viwhy = vcpu->arch.sie_block->ipb >> 16; | 156 | int viwhy = vcpu->arch.sie_block->ipb >> 16; |
| 157 | int rc; | ||
| 158 | |||
| 157 | vcpu->stat.exit_validity++; | 159 | vcpu->stat.exit_validity++; |
| 158 | if (viwhy == 0x37) { | 160 | if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix |
| 159 | fault_in_pages_writeable((char __user *) | 161 | <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){ |
| 160 | vcpu->kvm->arch.guest_origin + | 162 | rc = fault_in_pages_writeable((char __user *) |
| 161 | vcpu->arch.sie_block->prefix, | 163 | vcpu->kvm->arch.guest_origin + |
| 162 | PAGE_SIZE); | 164 | vcpu->arch.sie_block->prefix, |
| 163 | return 0; | 165 | 2*PAGE_SIZE); |
| 164 | } | 166 | if (rc) |
| 165 | VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", | 167 | /* user will receive sigsegv, exit to user */ |
| 166 | viwhy); | 168 | rc = -ENOTSUPP; |
| 167 | return -ENOTSUPP; | 169 | } else |
| 170 | rc = -ENOTSUPP; | ||
| 171 | |||
| 172 | if (rc) | ||
| 173 | VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", | ||
| 174 | viwhy); | ||
| 175 | return rc; | ||
| 168 | } | 176 | } |
| 169 | 177 | ||
| 170 | static int handle_instruction(struct kvm_vcpu *vcpu) | 178 | static int handle_instruction(struct kvm_vcpu *vcpu) |
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0189356fe209..f04f5301b1b4 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c | |||
| @@ -12,6 +12,8 @@ | |||
| 12 | 12 | ||
| 13 | #include <asm/lowcore.h> | 13 | #include <asm/lowcore.h> |
| 14 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
| 15 | #include <linux/hrtimer.h> | ||
| 16 | #include <linux/interrupt.h> | ||
| 15 | #include <linux/kvm_host.h> | 17 | #include <linux/kvm_host.h> |
| 16 | #include <linux/signal.h> | 18 | #include <linux/signal.h> |
| 17 | #include "kvm-s390.h" | 19 | #include "kvm-s390.h" |
| @@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | |||
| 299 | } | 301 | } |
| 300 | 302 | ||
| 301 | if ((!rc) && atomic_read(&fi->active)) { | 303 | if ((!rc) && atomic_read(&fi->active)) { |
| 302 | spin_lock_bh(&fi->lock); | 304 | spin_lock(&fi->lock); |
| 303 | list_for_each_entry(inti, &fi->list, list) | 305 | list_for_each_entry(inti, &fi->list, list) |
| 304 | if (__interrupt_is_deliverable(vcpu, inti)) { | 306 | if (__interrupt_is_deliverable(vcpu, inti)) { |
| 305 | rc = 1; | 307 | rc = 1; |
| 306 | break; | 308 | break; |
| 307 | } | 309 | } |
| 308 | spin_unlock_bh(&fi->lock); | 310 | spin_unlock(&fi->lock); |
| 309 | } | 311 | } |
| 310 | 312 | ||
| 311 | if ((!rc) && (vcpu->arch.sie_block->ckc < | 313 | if ((!rc) && (vcpu->arch.sie_block->ckc < |
| @@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) | |||
| 318 | return rc; | 320 | return rc; |
| 319 | } | 321 | } |
| 320 | 322 | ||
| 323 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | ||
| 324 | { | ||
| 325 | /* do real check here */ | ||
| 326 | return 1; | ||
| 327 | } | ||
| 328 | |||
| 321 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) | 329 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) |
| 322 | { | 330 | { |
| 323 | return 0; | 331 | return 0; |
| @@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) | |||
| 355 | return 0; | 363 | return 0; |
| 356 | } | 364 | } |
| 357 | 365 | ||
| 358 | sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; | 366 | sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; |
| 359 | 367 | ||
| 360 | vcpu->arch.ckc_timer.expires = jiffies + sltime; | 368 | hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); |
| 361 | 369 | VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); | |
| 362 | add_timer(&vcpu->arch.ckc_timer); | ||
| 363 | VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime); | ||
| 364 | no_timer: | 370 | no_timer: |
| 365 | spin_lock_bh(&vcpu->arch.local_int.float_int->lock); | 371 | spin_lock(&vcpu->arch.local_int.float_int->lock); |
| 366 | spin_lock_bh(&vcpu->arch.local_int.lock); | 372 | spin_lock_bh(&vcpu->arch.local_int.lock); |
| 367 | add_wait_queue(&vcpu->arch.local_int.wq, &wait); | 373 | add_wait_queue(&vcpu->arch.local_int.wq, &wait); |
| 368 | while (list_empty(&vcpu->arch.local_int.list) && | 374 | while (list_empty(&vcpu->arch.local_int.list) && |
| @@ -371,33 +377,46 @@ no_timer: | |||
| 371 | !signal_pending(current)) { | 377 | !signal_pending(current)) { |
| 372 | set_current_state(TASK_INTERRUPTIBLE); | 378 | set_current_state(TASK_INTERRUPTIBLE); |
| 373 | spin_unlock_bh(&vcpu->arch.local_int.lock); | 379 | spin_unlock_bh(&vcpu->arch.local_int.lock); |
| 374 | spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); | 380 | spin_unlock(&vcpu->arch.local_int.float_int->lock); |
| 375 | vcpu_put(vcpu); | 381 | vcpu_put(vcpu); |
| 376 | schedule(); | 382 | schedule(); |
| 377 | vcpu_load(vcpu); | 383 | vcpu_load(vcpu); |
| 378 | spin_lock_bh(&vcpu->arch.local_int.float_int->lock); | 384 | spin_lock(&vcpu->arch.local_int.float_int->lock); |
| 379 | spin_lock_bh(&vcpu->arch.local_int.lock); | 385 | spin_lock_bh(&vcpu->arch.local_int.lock); |
| 380 | } | 386 | } |
| 381 | __unset_cpu_idle(vcpu); | 387 | __unset_cpu_idle(vcpu); |
| 382 | __set_current_state(TASK_RUNNING); | 388 | __set_current_state(TASK_RUNNING); |
| 383 | remove_wait_queue(&vcpu->wq, &wait); | 389 | remove_wait_queue(&vcpu->wq, &wait); |
| 384 | spin_unlock_bh(&vcpu->arch.local_int.lock); | 390 | spin_unlock_bh(&vcpu->arch.local_int.lock); |
| 385 | spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); | 391 | spin_unlock(&vcpu->arch.local_int.float_int->lock); |
| 386 | del_timer(&vcpu->arch.ckc_timer); | 392 | hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); |
| 387 | return 0; | 393 | return 0; |
| 388 | } | 394 | } |
| 389 | 395 | ||
| 390 | void kvm_s390_idle_wakeup(unsigned long data) | 396 | void kvm_s390_tasklet(unsigned long parm) |
| 391 | { | 397 | { |
| 392 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; | 398 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; |
| 393 | 399 | ||
| 394 | spin_lock_bh(&vcpu->arch.local_int.lock); | 400 | spin_lock(&vcpu->arch.local_int.lock); |
| 395 | vcpu->arch.local_int.timer_due = 1; | 401 | vcpu->arch.local_int.timer_due = 1; |
| 396 | if (waitqueue_active(&vcpu->arch.local_int.wq)) | 402 | if (waitqueue_active(&vcpu->arch.local_int.wq)) |
| 397 | wake_up_interruptible(&vcpu->arch.local_int.wq); | 403 | wake_up_interruptible(&vcpu->arch.local_int.wq); |
| 398 | spin_unlock_bh(&vcpu->arch.local_int.lock); | 404 | spin_unlock(&vcpu->arch.local_int.lock); |
| 399 | } | 405 | } |
| 400 | 406 | ||
| 407 | /* | ||
| 408 | * low level hrtimer wake routine. Because this runs in hardirq context | ||
| 409 | * we schedule a tasklet to do the real work. | ||
| 410 | */ | ||
| 411 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) | ||
| 412 | { | ||
| 413 | struct kvm_vcpu *vcpu; | ||
| 414 | |||
| 415 | vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); | ||
| 416 | tasklet_schedule(&vcpu->arch.tasklet); | ||
| 417 | |||
| 418 | return HRTIMER_NORESTART; | ||
| 419 | } | ||
| 401 | 420 | ||
| 402 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | 421 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) |
| 403 | { | 422 | { |
| @@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
| 436 | if (atomic_read(&fi->active)) { | 455 | if (atomic_read(&fi->active)) { |
| 437 | do { | 456 | do { |
| 438 | deliver = 0; | 457 | deliver = 0; |
| 439 | spin_lock_bh(&fi->lock); | 458 | spin_lock(&fi->lock); |
| 440 | list_for_each_entry_safe(inti, n, &fi->list, list) { | 459 | list_for_each_entry_safe(inti, n, &fi->list, list) { |
| 441 | if (__interrupt_is_deliverable(vcpu, inti)) { | 460 | if (__interrupt_is_deliverable(vcpu, inti)) { |
| 442 | list_del(&inti->list); | 461 | list_del(&inti->list); |
| @@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) | |||
| 447 | } | 466 | } |
| 448 | if (list_empty(&fi->list)) | 467 | if (list_empty(&fi->list)) |
| 449 | atomic_set(&fi->active, 0); | 468 | atomic_set(&fi->active, 0); |
| 450 | spin_unlock_bh(&fi->lock); | 469 | spin_unlock(&fi->lock); |
| 451 | if (deliver) { | 470 | if (deliver) { |
| 452 | __do_deliver_interrupt(vcpu, inti); | 471 | __do_deliver_interrupt(vcpu, inti); |
| 453 | kfree(inti); | 472 | kfree(inti); |
| @@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
| 512 | 531 | ||
| 513 | mutex_lock(&kvm->lock); | 532 | mutex_lock(&kvm->lock); |
| 514 | fi = &kvm->arch.float_int; | 533 | fi = &kvm->arch.float_int; |
| 515 | spin_lock_bh(&fi->lock); | 534 | spin_lock(&fi->lock); |
| 516 | list_add_tail(&inti->list, &fi->list); | 535 | list_add_tail(&inti->list, &fi->list); |
| 517 | atomic_set(&fi->active, 1); | 536 | atomic_set(&fi->active, 1); |
| 518 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); | 537 | sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); |
| @@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, | |||
| 529 | if (waitqueue_active(&li->wq)) | 548 | if (waitqueue_active(&li->wq)) |
| 530 | wake_up_interruptible(&li->wq); | 549 | wake_up_interruptible(&li->wq); |
| 531 | spin_unlock_bh(&li->lock); | 550 | spin_unlock_bh(&li->lock); |
| 532 | spin_unlock_bh(&fi->lock); | 551 | spin_unlock(&fi->lock); |
| 533 | mutex_unlock(&kvm->lock); | 552 | mutex_unlock(&kvm->lock); |
| 534 | return 0; | 553 | return 0; |
| 535 | } | 554 | } |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f4d56e9939c9..10bccd1f8aee 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
| 16 | #include <linux/err.h> | 16 | #include <linux/err.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/hrtimer.h> | ||
| 18 | #include <linux/init.h> | 19 | #include <linux/init.h> |
| 19 | #include <linux/kvm.h> | 20 | #include <linux/kvm.h> |
| 20 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
| @@ -195,6 +196,10 @@ out_nokvm: | |||
| 195 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | 196 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) |
| 196 | { | 197 | { |
| 197 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); | 198 | VCPU_EVENT(vcpu, 3, "%s", "free cpu"); |
| 199 | if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == | ||
| 200 | (__u64) vcpu->arch.sie_block) | ||
| 201 | vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; | ||
| 202 | smp_mb(); | ||
| 198 | free_page((unsigned long)(vcpu->arch.sie_block)); | 203 | free_page((unsigned long)(vcpu->arch.sie_block)); |
| 199 | kvm_vcpu_uninit(vcpu); | 204 | kvm_vcpu_uninit(vcpu); |
| 200 | kfree(vcpu); | 205 | kfree(vcpu); |
| @@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 283 | vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; | 288 | vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; |
| 284 | vcpu->arch.sie_block->ecb = 2; | 289 | vcpu->arch.sie_block->ecb = 2; |
| 285 | vcpu->arch.sie_block->eca = 0xC1002001U; | 290 | vcpu->arch.sie_block->eca = 0xC1002001U; |
| 286 | setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, | 291 | hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); |
| 287 | (unsigned long) vcpu); | 292 | tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, |
| 293 | (unsigned long) vcpu); | ||
| 294 | vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; | ||
| 288 | get_cpu_id(&vcpu->arch.cpu_id); | 295 | get_cpu_id(&vcpu->arch.cpu_id); |
| 289 | vcpu->arch.cpu_id.version = 0xff; | 296 | vcpu->arch.cpu_id.version = 0xff; |
| 290 | return 0; | 297 | return 0; |
| @@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, | |||
| 307 | 314 | ||
| 308 | vcpu->arch.sie_block->icpua = id; | 315 | vcpu->arch.sie_block->icpua = id; |
| 309 | BUG_ON(!kvm->arch.sca); | 316 | BUG_ON(!kvm->arch.sca); |
| 310 | BUG_ON(kvm->arch.sca->cpu[id].sda); | 317 | if (!kvm->arch.sca->cpu[id].sda) |
| 311 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; | 318 | kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; |
| 319 | else | ||
| 320 | BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */ | ||
| 312 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); | 321 | vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); |
| 313 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; | 322 | vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; |
| 314 | 323 | ||
| 315 | spin_lock_init(&vcpu->arch.local_int.lock); | 324 | spin_lock_init(&vcpu->arch.local_int.lock); |
| 316 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); | 325 | INIT_LIST_HEAD(&vcpu->arch.local_int.list); |
| 317 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; | 326 | vcpu->arch.local_int.float_int = &kvm->arch.float_int; |
| 318 | spin_lock_bh(&kvm->arch.float_int.lock); | 327 | spin_lock(&kvm->arch.float_int.lock); |
| 319 | kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; | 328 | kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; |
| 320 | init_waitqueue_head(&vcpu->arch.local_int.wq); | 329 | init_waitqueue_head(&vcpu->arch.local_int.wq); |
| 321 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; | 330 | vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; |
| 322 | spin_unlock_bh(&kvm->arch.float_int.lock); | 331 | spin_unlock(&kvm->arch.float_int.lock); |
| 323 | 332 | ||
| 324 | rc = kvm_vcpu_init(vcpu, kvm, id); | 333 | rc = kvm_vcpu_init(vcpu, kvm, id); |
| 325 | if (rc) | 334 | if (rc) |
| @@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 478 | 487 | ||
| 479 | vcpu_load(vcpu); | 488 | vcpu_load(vcpu); |
| 480 | 489 | ||
| 490 | /* verify, that memory has been registered */ | ||
| 491 | if (!vcpu->kvm->arch.guest_memsize) { | ||
| 492 | vcpu_put(vcpu); | ||
| 493 | return -EINVAL; | ||
| 494 | } | ||
| 495 | |||
| 481 | if (vcpu->sigset_active) | 496 | if (vcpu->sigset_active) |
| 482 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 497 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
| 483 | 498 | ||
| @@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 657 | struct kvm_memory_slot old, | 672 | struct kvm_memory_slot old, |
| 658 | int user_alloc) | 673 | int user_alloc) |
| 659 | { | 674 | { |
| 675 | int i; | ||
| 676 | |||
| 660 | /* A few sanity checks. We can have exactly one memory slot which has | 677 | /* A few sanity checks. We can have exactly one memory slot which has |
| 661 | to start at guest virtual zero and which has to be located at a | 678 | to start at guest virtual zero and which has to be located at a |
| 662 | page boundary in userland and which has to end at a page boundary. | 679 | page boundary in userland and which has to end at a page boundary. |
| @@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 664 | vmas. It is okay to mmap() and munmap() stuff in this slot after | 681 | vmas. It is okay to mmap() and munmap() stuff in this slot after |
| 665 | doing this call at any time */ | 682 | doing this call at any time */ |
| 666 | 683 | ||
| 667 | if (mem->slot) | 684 | if (mem->slot || kvm->arch.guest_memsize) |
| 668 | return -EINVAL; | 685 | return -EINVAL; |
| 669 | 686 | ||
| 670 | if (mem->guest_phys_addr) | 687 | if (mem->guest_phys_addr) |
| @@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 676 | if (mem->memory_size & (PAGE_SIZE - 1)) | 693 | if (mem->memory_size & (PAGE_SIZE - 1)) |
| 677 | return -EINVAL; | 694 | return -EINVAL; |
| 678 | 695 | ||
| 696 | if (!user_alloc) | ||
| 697 | return -EINVAL; | ||
| 698 | |||
| 699 | /* lock all vcpus */ | ||
| 700 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
| 701 | if (!kvm->vcpus[i]) | ||
| 702 | continue; | ||
| 703 | if (!mutex_trylock(&kvm->vcpus[i]->mutex)) | ||
| 704 | goto fail_out; | ||
| 705 | } | ||
| 706 | |||
| 679 | kvm->arch.guest_origin = mem->userspace_addr; | 707 | kvm->arch.guest_origin = mem->userspace_addr; |
| 680 | kvm->arch.guest_memsize = mem->memory_size; | 708 | kvm->arch.guest_memsize = mem->memory_size; |
| 681 | 709 | ||
| 682 | /* FIXME: we do want to interrupt running CPUs and update their memory | 710 | /* update sie control blocks, and unlock all vcpus */ |
| 683 | configuration now to avoid race conditions. But hey, changing the | 711 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { |
| 684 | memory layout while virtual CPUs are running is usually bad | 712 | if (kvm->vcpus[i]) { |
| 685 | programming practice. */ | 713 | kvm->vcpus[i]->arch.sie_block->gmsor = |
| 714 | kvm->arch.guest_origin; | ||
| 715 | kvm->vcpus[i]->arch.sie_block->gmslm = | ||
| 716 | kvm->arch.guest_memsize + | ||
| 717 | kvm->arch.guest_origin + | ||
| 718 | VIRTIODESCSPACE - 1ul; | ||
| 719 | mutex_unlock(&kvm->vcpus[i]->mutex); | ||
| 720 | } | ||
| 721 | } | ||
| 686 | 722 | ||
| 687 | return 0; | 723 | return 0; |
| 724 | |||
| 725 | fail_out: | ||
| 726 | for (; i >= 0; i--) | ||
| 727 | mutex_unlock(&kvm->vcpus[i]->mutex); | ||
| 728 | return -EINVAL; | ||
| 688 | } | 729 | } |
| 689 | 730 | ||
| 690 | void kvm_arch_flush_shadow(struct kvm *kvm) | 731 | void kvm_arch_flush_shadow(struct kvm *kvm) |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 00bbe69b78da..748fee872323 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #ifndef ARCH_S390_KVM_S390_H | 14 | #ifndef ARCH_S390_KVM_S390_H |
| 15 | #define ARCH_S390_KVM_S390_H | 15 | #define ARCH_S390_KVM_S390_H |
| 16 | 16 | ||
| 17 | #include <linux/hrtimer.h> | ||
| 17 | #include <linux/kvm.h> | 18 | #include <linux/kvm.h> |
| 18 | #include <linux/kvm_host.h> | 19 | #include <linux/kvm_host.h> |
| 19 | 20 | ||
| @@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) | |||
| 41 | } | 42 | } |
| 42 | 43 | ||
| 43 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | 44 | int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); |
| 44 | void kvm_s390_idle_wakeup(unsigned long data); | 45 | enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); |
| 46 | void kvm_s390_tasklet(unsigned long parm); | ||
| 45 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); | 47 | void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); |
| 46 | int kvm_s390_inject_vm(struct kvm *kvm, | 48 | int kvm_s390_inject_vm(struct kvm *kvm, |
| 47 | struct kvm_s390_interrupt *s390int); | 49 | struct kvm_s390_interrupt *s390int); |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4b88834b8dd8..93ecd06e1a74 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
| @@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) | |||
| 204 | int cpus = 0; | 204 | int cpus = 0; |
| 205 | int n; | 205 | int n; |
| 206 | 206 | ||
| 207 | spin_lock_bh(&fi->lock); | 207 | spin_lock(&fi->lock); |
| 208 | for (n = 0; n < KVM_MAX_VCPUS; n++) | 208 | for (n = 0; n < KVM_MAX_VCPUS; n++) |
| 209 | if (fi->local_int[n]) | 209 | if (fi->local_int[n]) |
| 210 | cpus++; | 210 | cpus++; |
| 211 | spin_unlock_bh(&fi->lock); | 211 | spin_unlock(&fi->lock); |
| 212 | 212 | ||
| 213 | /* deal with other level 3 hypervisors */ | 213 | /* deal with other level 3 hypervisors */ |
| 214 | if (stsi(mem, 3, 2, 2) == -ENOSYS) | 214 | if (stsi(mem, 3, 2, 2) == -ENOSYS) |
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index f27dbedf0866..36678835034d 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
| @@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, | |||
| 52 | if (cpu_addr >= KVM_MAX_VCPUS) | 52 | if (cpu_addr >= KVM_MAX_VCPUS) |
| 53 | return 3; /* not operational */ | 53 | return 3; /* not operational */ |
| 54 | 54 | ||
| 55 | spin_lock_bh(&fi->lock); | 55 | spin_lock(&fi->lock); |
| 56 | if (fi->local_int[cpu_addr] == NULL) | 56 | if (fi->local_int[cpu_addr] == NULL) |
| 57 | rc = 3; /* not operational */ | 57 | rc = 3; /* not operational */ |
| 58 | else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) | 58 | else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) |
| @@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, | |||
| 64 | *reg |= SIGP_STAT_STOPPED; | 64 | *reg |= SIGP_STAT_STOPPED; |
| 65 | rc = 1; /* status stored */ | 65 | rc = 1; /* status stored */ |
| 66 | } | 66 | } |
| 67 | spin_unlock_bh(&fi->lock); | 67 | spin_unlock(&fi->lock); |
| 68 | 68 | ||
| 69 | VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); | 69 | VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); |
| 70 | return rc; | 70 | return rc; |
| @@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
| 86 | 86 | ||
| 87 | inti->type = KVM_S390_INT_EMERGENCY; | 87 | inti->type = KVM_S390_INT_EMERGENCY; |
| 88 | 88 | ||
| 89 | spin_lock_bh(&fi->lock); | 89 | spin_lock(&fi->lock); |
| 90 | li = fi->local_int[cpu_addr]; | 90 | li = fi->local_int[cpu_addr]; |
| 91 | if (li == NULL) { | 91 | if (li == NULL) { |
| 92 | rc = 3; /* not operational */ | 92 | rc = 3; /* not operational */ |
| @@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) | |||
| 102 | spin_unlock_bh(&li->lock); | 102 | spin_unlock_bh(&li->lock); |
| 103 | rc = 0; /* order accepted */ | 103 | rc = 0; /* order accepted */ |
| 104 | unlock: | 104 | unlock: |
| 105 | spin_unlock_bh(&fi->lock); | 105 | spin_unlock(&fi->lock); |
| 106 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); | 106 | VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); |
| 107 | return rc; | 107 | return rc; |
| 108 | } | 108 | } |
| @@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) | |||
| 123 | 123 | ||
| 124 | inti->type = KVM_S390_SIGP_STOP; | 124 | inti->type = KVM_S390_SIGP_STOP; |
| 125 | 125 | ||
| 126 | spin_lock_bh(&fi->lock); | 126 | spin_lock(&fi->lock); |
| 127 | li = fi->local_int[cpu_addr]; | 127 | li = fi->local_int[cpu_addr]; |
| 128 | if (li == NULL) { | 128 | if (li == NULL) { |
| 129 | rc = 3; /* not operational */ | 129 | rc = 3; /* not operational */ |
| @@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) | |||
| 142 | spin_unlock_bh(&li->lock); | 142 | spin_unlock_bh(&li->lock); |
| 143 | rc = 0; /* order accepted */ | 143 | rc = 0; /* order accepted */ |
| 144 | unlock: | 144 | unlock: |
| 145 | spin_unlock_bh(&fi->lock); | 145 | spin_unlock(&fi->lock); |
| 146 | VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); | 146 | VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); |
| 147 | return rc; | 147 | return rc; |
| 148 | } | 148 | } |
| @@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, | |||
| 188 | if (!inti) | 188 | if (!inti) |
| 189 | return 2; /* busy */ | 189 | return 2; /* busy */ |
| 190 | 190 | ||
| 191 | spin_lock_bh(&fi->lock); | 191 | spin_lock(&fi->lock); |
| 192 | li = fi->local_int[cpu_addr]; | 192 | li = fi->local_int[cpu_addr]; |
| 193 | 193 | ||
| 194 | if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { | 194 | if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { |
| @@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, | |||
| 220 | out_li: | 220 | out_li: |
| 221 | spin_unlock_bh(&li->lock); | 221 | spin_unlock_bh(&li->lock); |
| 222 | out_fi: | 222 | out_fi: |
| 223 | spin_unlock_bh(&fi->lock); | 223 | spin_unlock(&fi->lock); |
| 224 | return rc; | 224 | return rc; |
| 225 | } | 225 | } |
| 226 | 226 | ||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 19af42138f78..4a28d22d4793 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -116,6 +116,8 @@ | |||
| 116 | #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ | 116 | #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ |
| 117 | #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ | 117 | #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ |
| 118 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ | 118 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ |
| 119 | #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ | ||
| 120 | #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ | ||
| 119 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ | 121 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ |
| 120 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 122 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
| 121 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 123 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index dc3f6cf11704..125be8b19568 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #define __KVM_HAVE_MSI | 16 | #define __KVM_HAVE_MSI |
| 17 | #define __KVM_HAVE_USER_NMI | 17 | #define __KVM_HAVE_USER_NMI |
| 18 | #define __KVM_HAVE_GUEST_DEBUG | 18 | #define __KVM_HAVE_GUEST_DEBUG |
| 19 | #define __KVM_HAVE_MSIX | ||
| 19 | 20 | ||
| 20 | /* Architectural interrupt line count. */ | 21 | /* Architectural interrupt line count. */ |
| 21 | #define KVM_NR_INTERRUPTS 256 | 22 | #define KVM_NR_INTERRUPTS 256 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f0faf58044ff..eabdc1cfab5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -185,6 +185,7 @@ union kvm_mmu_page_role { | |||
| 185 | unsigned access:3; | 185 | unsigned access:3; |
| 186 | unsigned invalid:1; | 186 | unsigned invalid:1; |
| 187 | unsigned cr4_pge:1; | 187 | unsigned cr4_pge:1; |
| 188 | unsigned nxe:1; | ||
| 188 | }; | 189 | }; |
| 189 | }; | 190 | }; |
| 190 | 191 | ||
| @@ -212,7 +213,6 @@ struct kvm_mmu_page { | |||
| 212 | int multimapped; /* More than one parent_pte? */ | 213 | int multimapped; /* More than one parent_pte? */ |
| 213 | int root_count; /* Currently serving as active root */ | 214 | int root_count; /* Currently serving as active root */ |
| 214 | bool unsync; | 215 | bool unsync; |
| 215 | bool global; | ||
| 216 | unsigned int unsync_children; | 216 | unsigned int unsync_children; |
| 217 | union { | 217 | union { |
| 218 | u64 *parent_pte; /* !multimapped */ | 218 | u64 *parent_pte; /* !multimapped */ |
| @@ -261,13 +261,11 @@ struct kvm_mmu { | |||
| 261 | union kvm_mmu_page_role base_role; | 261 | union kvm_mmu_page_role base_role; |
| 262 | 262 | ||
| 263 | u64 *pae_root; | 263 | u64 *pae_root; |
| 264 | u64 rsvd_bits_mask[2][4]; | ||
| 264 | }; | 265 | }; |
| 265 | 266 | ||
| 266 | struct kvm_vcpu_arch { | 267 | struct kvm_vcpu_arch { |
| 267 | u64 host_tsc; | 268 | u64 host_tsc; |
| 268 | int interrupt_window_open; | ||
| 269 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ | ||
| 270 | DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); | ||
| 271 | /* | 269 | /* |
| 272 | * rip and regs accesses must go through | 270 | * rip and regs accesses must go through |
| 273 | * kvm_{register,rip}_{read,write} functions. | 271 | * kvm_{register,rip}_{read,write} functions. |
| @@ -286,6 +284,7 @@ struct kvm_vcpu_arch { | |||
| 286 | u64 shadow_efer; | 284 | u64 shadow_efer; |
| 287 | u64 apic_base; | 285 | u64 apic_base; |
| 288 | struct kvm_lapic *apic; /* kernel irqchip context */ | 286 | struct kvm_lapic *apic; /* kernel irqchip context */ |
| 287 | int32_t apic_arb_prio; | ||
| 289 | int mp_state; | 288 | int mp_state; |
| 290 | int sipi_vector; | 289 | int sipi_vector; |
| 291 | u64 ia32_misc_enable_msr; | 290 | u64 ia32_misc_enable_msr; |
| @@ -320,6 +319,8 @@ struct kvm_vcpu_arch { | |||
| 320 | struct kvm_pio_request pio; | 319 | struct kvm_pio_request pio; |
| 321 | void *pio_data; | 320 | void *pio_data; |
| 322 | 321 | ||
| 322 | u8 event_exit_inst_len; | ||
| 323 | |||
| 323 | struct kvm_queued_exception { | 324 | struct kvm_queued_exception { |
| 324 | bool pending; | 325 | bool pending; |
| 325 | bool has_error_code; | 326 | bool has_error_code; |
| @@ -329,11 +330,12 @@ struct kvm_vcpu_arch { | |||
| 329 | 330 | ||
| 330 | struct kvm_queued_interrupt { | 331 | struct kvm_queued_interrupt { |
| 331 | bool pending; | 332 | bool pending; |
| 333 | bool soft; | ||
| 332 | u8 nr; | 334 | u8 nr; |
| 333 | } interrupt; | 335 | } interrupt; |
| 334 | 336 | ||
| 335 | struct { | 337 | struct { |
| 336 | int active; | 338 | int vm86_active; |
| 337 | u8 save_iopl; | 339 | u8 save_iopl; |
| 338 | struct kvm_save_segment { | 340 | struct kvm_save_segment { |
| 339 | u16 selector; | 341 | u16 selector; |
| @@ -356,9 +358,9 @@ struct kvm_vcpu_arch { | |||
| 356 | unsigned int time_offset; | 358 | unsigned int time_offset; |
| 357 | struct page *time_page; | 359 | struct page *time_page; |
| 358 | 360 | ||
| 361 | bool singlestep; /* guest is single stepped by KVM */ | ||
| 359 | bool nmi_pending; | 362 | bool nmi_pending; |
| 360 | bool nmi_injected; | 363 | bool nmi_injected; |
| 361 | bool nmi_window_open; | ||
| 362 | 364 | ||
| 363 | struct mtrr_state_type mtrr_state; | 365 | struct mtrr_state_type mtrr_state; |
| 364 | u32 pat; | 366 | u32 pat; |
| @@ -392,15 +394,14 @@ struct kvm_arch{ | |||
| 392 | */ | 394 | */ |
| 393 | struct list_head active_mmu_pages; | 395 | struct list_head active_mmu_pages; |
| 394 | struct list_head assigned_dev_head; | 396 | struct list_head assigned_dev_head; |
| 395 | struct list_head oos_global_pages; | ||
| 396 | struct iommu_domain *iommu_domain; | 397 | struct iommu_domain *iommu_domain; |
| 398 | int iommu_flags; | ||
| 397 | struct kvm_pic *vpic; | 399 | struct kvm_pic *vpic; |
| 398 | struct kvm_ioapic *vioapic; | 400 | struct kvm_ioapic *vioapic; |
| 399 | struct kvm_pit *vpit; | 401 | struct kvm_pit *vpit; |
| 400 | struct hlist_head irq_ack_notifier_list; | 402 | struct hlist_head irq_ack_notifier_list; |
| 401 | int vapics_in_nmi_mode; | 403 | int vapics_in_nmi_mode; |
| 402 | 404 | ||
| 403 | int round_robin_prev_vcpu; | ||
| 404 | unsigned int tss_addr; | 405 | unsigned int tss_addr; |
| 405 | struct page *apic_access_page; | 406 | struct page *apic_access_page; |
| 406 | 407 | ||
| @@ -423,7 +424,6 @@ struct kvm_vm_stat { | |||
| 423 | u32 mmu_recycled; | 424 | u32 mmu_recycled; |
| 424 | u32 mmu_cache_miss; | 425 | u32 mmu_cache_miss; |
| 425 | u32 mmu_unsync; | 426 | u32 mmu_unsync; |
| 426 | u32 mmu_unsync_global; | ||
| 427 | u32 remote_tlb_flush; | 427 | u32 remote_tlb_flush; |
| 428 | u32 lpages; | 428 | u32 lpages; |
| 429 | }; | 429 | }; |
| @@ -443,7 +443,6 @@ struct kvm_vcpu_stat { | |||
| 443 | u32 halt_exits; | 443 | u32 halt_exits; |
| 444 | u32 halt_wakeup; | 444 | u32 halt_wakeup; |
| 445 | u32 request_irq_exits; | 445 | u32 request_irq_exits; |
| 446 | u32 request_nmi_exits; | ||
| 447 | u32 irq_exits; | 446 | u32 irq_exits; |
| 448 | u32 host_state_reload; | 447 | u32 host_state_reload; |
| 449 | u32 efer_reload; | 448 | u32 efer_reload; |
| @@ -511,20 +510,22 @@ struct kvm_x86_ops { | |||
| 511 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); | 510 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); |
| 512 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); | 511 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); |
| 513 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); | 512 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
| 513 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | ||
| 514 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | ||
| 514 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, | 515 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, |
| 515 | unsigned char *hypercall_addr); | 516 | unsigned char *hypercall_addr); |
| 516 | int (*get_irq)(struct kvm_vcpu *vcpu); | 517 | void (*set_irq)(struct kvm_vcpu *vcpu); |
| 517 | void (*set_irq)(struct kvm_vcpu *vcpu, int vec); | 518 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
| 518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 519 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
| 519 | bool has_error_code, u32 error_code); | 520 | bool has_error_code, u32 error_code); |
| 520 | bool (*exception_injected)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
| 521 | void (*inject_pending_irq)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
| 522 | void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, | 523 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
| 523 | struct kvm_run *run); | 524 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
| 524 | 525 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | |
| 525 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 526 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
| 526 | int (*get_tdp_level)(void); | 527 | int (*get_tdp_level)(void); |
| 527 | int (*get_mt_mask_shift)(void); | 528 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
| 528 | }; | 529 | }; |
| 529 | 530 | ||
| 530 | extern struct kvm_x86_ops *kvm_x86_ops; | 531 | extern struct kvm_x86_ops *kvm_x86_ops; |
| @@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); | |||
| 538 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | 539 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); |
| 539 | void kvm_mmu_set_base_ptes(u64 base_pte); | 540 | void kvm_mmu_set_base_ptes(u64 base_pte); |
| 540 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 541 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
| 541 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); | 542 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
| 542 | 543 | ||
| 543 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 544 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
| 544 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 545 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
| @@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 552 | const void *val, int bytes); | 553 | const void *val, int bytes); |
| 553 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | 554 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, |
| 554 | gpa_t addr, unsigned long *ret); | 555 | gpa_t addr, unsigned long *ret); |
| 556 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
| 555 | 557 | ||
| 556 | extern bool tdp_enabled; | 558 | extern bool tdp_enabled; |
| 557 | 559 | ||
| @@ -563,6 +565,7 @@ enum emulation_result { | |||
| 563 | 565 | ||
| 564 | #define EMULTYPE_NO_DECODE (1 << 0) | 566 | #define EMULTYPE_NO_DECODE (1 << 0) |
| 565 | #define EMULTYPE_TRAP_UD (1 << 1) | 567 | #define EMULTYPE_TRAP_UD (1 << 1) |
| 568 | #define EMULTYPE_SKIP (1 << 2) | ||
| 566 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, | 569 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, |
| 567 | unsigned long cr2, u16 error_code, int emulation_type); | 570 | unsigned long cr2, u16 error_code, int emulation_type); |
| 568 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 571 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
| @@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | |||
| 638 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 641 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
| 639 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 642 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
| 640 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 643 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
| 641 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); | ||
| 642 | 644 | ||
| 643 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 645 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
| 644 | 646 | ||
| @@ -769,6 +771,8 @@ enum { | |||
| 769 | #define HF_GIF_MASK (1 << 0) | 771 | #define HF_GIF_MASK (1 << 0) |
| 770 | #define HF_HIF_MASK (1 << 1) | 772 | #define HF_HIF_MASK (1 << 1) |
| 771 | #define HF_VINTR_MASK (1 << 2) | 773 | #define HF_VINTR_MASK (1 << 2) |
| 774 | #define HF_NMI_MASK (1 << 3) | ||
| 775 | #define HF_IRET_MASK (1 << 4) | ||
| 772 | 776 | ||
| 773 | /* | 777 | /* |
| 774 | * Hardware virtualization extension instructions may fault if a | 778 | * Hardware virtualization extension instructions may fault if a |
| @@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
| 791 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 795 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
| 792 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 796 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
| 793 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 797 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
| 798 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | ||
| 794 | 799 | ||
| 795 | #endif /* _ASM_X86_KVM_HOST_H */ | 800 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 6a159732881a..b7ed2c423116 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h | |||
| @@ -143,6 +143,9 @@ struct decode_cache { | |||
| 143 | struct fetch_cache fetch; | 143 | struct fetch_cache fetch; |
| 144 | }; | 144 | }; |
| 145 | 145 | ||
| 146 | #define X86_SHADOW_INT_MOV_SS 1 | ||
| 147 | #define X86_SHADOW_INT_STI 2 | ||
| 148 | |||
| 146 | struct x86_emulate_ctxt { | 149 | struct x86_emulate_ctxt { |
| 147 | /* Register state before/after emulation. */ | 150 | /* Register state before/after emulation. */ |
| 148 | struct kvm_vcpu *vcpu; | 151 | struct kvm_vcpu *vcpu; |
| @@ -152,6 +155,9 @@ struct x86_emulate_ctxt { | |||
| 152 | int mode; | 155 | int mode; |
| 153 | u32 cs_base; | 156 | u32 cs_base; |
| 154 | 157 | ||
| 158 | /* interruptibility state, as a result of execution of STI or MOV SS */ | ||
| 159 | int interruptibility; | ||
| 160 | |||
| 155 | /* decode cache */ | 161 | /* decode cache */ |
| 156 | struct decode_cache decode; | 162 | struct decode_cache decode; |
| 157 | }; | 163 | }; |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ada75f3ebf..85574b7c1bc1 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
| @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
| 225 | #define SVM_EVTINJ_VALID_ERR (1 << 11) | 225 | #define SVM_EVTINJ_VALID_ERR (1 << 11) |
| 226 | 226 | ||
| 227 | #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK | 227 | #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK |
| 228 | #define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK | ||
| 228 | 229 | ||
| 229 | #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR | 230 | #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR |
| 230 | #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI | 231 | #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 498f944010b9..11be5ad2e0e9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -247,6 +247,7 @@ enum vmcs_field { | |||
| 247 | #define EXIT_REASON_MSR_READ 31 | 247 | #define EXIT_REASON_MSR_READ 31 |
| 248 | #define EXIT_REASON_MSR_WRITE 32 | 248 | #define EXIT_REASON_MSR_WRITE 32 |
| 249 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 249 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
| 250 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | ||
| 250 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 251 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
| 251 | #define EXIT_REASON_APIC_ACCESS 44 | 252 | #define EXIT_REASON_APIC_ACCESS 44 |
| 252 | #define EXIT_REASON_EPT_VIOLATION 48 | 253 | #define EXIT_REASON_EPT_VIOLATION 48 |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 09dd1d414fc3..289cc4815028 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
| @@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 420 | out2: | 420 | out2: |
| 421 | atomic_dec(&mce_entry); | 421 | atomic_dec(&mce_entry); |
| 422 | } | 422 | } |
| 423 | EXPORT_SYMBOL_GPL(do_machine_check); | ||
| 423 | 424 | ||
| 424 | #ifdef CONFIG_X86_MCE_INTEL | 425 | #ifdef CONFIG_X86_MCE_INTEL |
| 425 | /*** | 426 | /*** |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6551dedee20c..a78ecad0c900 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
| 28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
| 29 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
| 30 | #include <asm/timer.h> | ||
| 30 | 31 | ||
| 31 | #define MMU_QUEUE_SIZE 1024 | 32 | #define MMU_QUEUE_SIZE 1024 |
| 32 | 33 | ||
| @@ -230,6 +231,9 @@ static void paravirt_ops_setup(void) | |||
| 230 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | 231 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; |
| 231 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | 232 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; |
| 232 | } | 233 | } |
| 234 | #ifdef CONFIG_X86_IO_APIC | ||
| 235 | no_timer_check = 1; | ||
| 236 | #endif | ||
| 233 | } | 237 | } |
| 234 | 238 | ||
| 235 | void __init kvm_guest_init(void) | 239 | void __init kvm_guest_init(void) |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index f6db48c405b8..28f5fb495a66 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs) | |||
| 172 | { | 172 | { |
| 173 | ack_APIC_irq(); | 173 | ack_APIC_irq(); |
| 174 | inc_irq_stat(irq_resched_count); | 174 | inc_irq_stat(irq_resched_count); |
| 175 | /* | ||
| 176 | * KVM uses this interrupt to force a cpu out of guest mode | ||
| 177 | */ | ||
| 175 | } | 178 | } |
| 176 | 179 | ||
| 177 | void smp_call_function_interrupt(struct pt_regs *regs) | 180 | void smp_call_function_interrupt(struct pt_regs *regs) |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a58504ea78cc..8600a09e0c6c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
| @@ -50,6 +50,9 @@ config KVM_INTEL | |||
| 50 | Provides support for KVM on Intel processors equipped with the VT | 50 | Provides support for KVM on Intel processors equipped with the VT |
| 51 | extensions. | 51 | extensions. |
| 52 | 52 | ||
| 53 | To compile this as a module, choose M here: the module | ||
| 54 | will be called kvm-intel. | ||
| 55 | |||
| 53 | config KVM_AMD | 56 | config KVM_AMD |
| 54 | tristate "KVM for AMD processors support" | 57 | tristate "KVM for AMD processors support" |
| 55 | depends on KVM | 58 | depends on KVM |
| @@ -57,6 +60,9 @@ config KVM_AMD | |||
| 57 | Provides support for KVM on AMD processors equipped with the AMD-V | 60 | Provides support for KVM on AMD processors equipped with the AMD-V |
| 58 | (SVM) extensions. | 61 | (SVM) extensions. |
| 59 | 62 | ||
| 63 | To compile this as a module, choose M here: the module | ||
| 64 | will be called kvm-amd. | ||
| 65 | |||
| 60 | config KVM_TRACE | 66 | config KVM_TRACE |
| 61 | bool "KVM trace support" | 67 | bool "KVM trace support" |
| 62 | depends on KVM && SYSFS | 68 | depends on KVM && SYSFS |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d3ec292f00f2..b43c4efafe80 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
| @@ -14,7 +14,7 @@ endif | |||
| 14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
| 15 | 15 | ||
| 16 | kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ | 16 | kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ |
| 17 | i8254.o | 17 | i8254.o timer.o |
| 18 | obj-$(CONFIG_KVM) += kvm.o | 18 | obj-$(CONFIG_KVM) += kvm.o |
| 19 | kvm-intel-objs = vmx.o | 19 | kvm-intel-objs = vmx.o |
| 20 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o | 20 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c13bb92d3157..4d6f0d293ee2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel) | |||
| 98 | return kvm->arch.vpit->pit_state.channels[channel].gate; | 98 | return kvm->arch.vpit->pit_state.channels[channel].gate; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | static s64 __kpit_elapsed(struct kvm *kvm) | ||
| 102 | { | ||
| 103 | s64 elapsed; | ||
| 104 | ktime_t remaining; | ||
| 105 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | ||
| 106 | |||
| 107 | /* | ||
| 108 | * The Counter does not stop when it reaches zero. In | ||
| 109 | * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to | ||
| 110 | * the highest count, either FFFF hex for binary counting | ||
| 111 | * or 9999 for BCD counting, and continues counting. | ||
| 112 | * Modes 2 and 3 are periodic; the Counter reloads | ||
| 113 | * itself with the initial count and continues counting | ||
| 114 | * from there. | ||
| 115 | */ | ||
| 116 | remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); | ||
| 117 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); | ||
| 118 | elapsed = mod_64(elapsed, ps->pit_timer.period); | ||
| 119 | |||
| 120 | return elapsed; | ||
| 121 | } | ||
| 122 | |||
| 123 | static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c, | ||
| 124 | int channel) | ||
| 125 | { | ||
| 126 | if (channel == 0) | ||
| 127 | return __kpit_elapsed(kvm); | ||
| 128 | |||
| 129 | return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | ||
| 130 | } | ||
| 131 | |||
| 101 | static int pit_get_count(struct kvm *kvm, int channel) | 132 | static int pit_get_count(struct kvm *kvm, int channel) |
| 102 | { | 133 | { |
| 103 | struct kvm_kpit_channel_state *c = | 134 | struct kvm_kpit_channel_state *c = |
| @@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel) | |||
| 107 | 138 | ||
| 108 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 139 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); |
| 109 | 140 | ||
| 110 | t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | 141 | t = kpit_elapsed(kvm, c, channel); |
| 111 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | 142 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); |
| 112 | 143 | ||
| 113 | switch (c->mode) { | 144 | switch (c->mode) { |
| @@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel) | |||
| 137 | 168 | ||
| 138 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 169 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); |
| 139 | 170 | ||
| 140 | t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | 171 | t = kpit_elapsed(kvm, c, channel); |
| 141 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | 172 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); |
| 142 | 173 | ||
| 143 | switch (c->mode) { | 174 | switch (c->mode) { |
| @@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel) | |||
| 193 | } | 224 | } |
| 194 | } | 225 | } |
| 195 | 226 | ||
| 196 | static int __pit_timer_fn(struct kvm_kpit_state *ps) | ||
| 197 | { | ||
| 198 | struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; | ||
| 199 | struct kvm_kpit_timer *pt = &ps->pit_timer; | ||
| 200 | |||
| 201 | if (!atomic_inc_and_test(&pt->pending)) | ||
| 202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); | ||
| 203 | |||
| 204 | if (!pt->reinject) | ||
| 205 | atomic_set(&pt->pending, 1); | ||
| 206 | |||
| 207 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) | ||
| 208 | wake_up_interruptible(&vcpu0->wq); | ||
| 209 | |||
| 210 | hrtimer_add_expires_ns(&pt->timer, pt->period); | ||
| 211 | pt->scheduled = hrtimer_get_expires_ns(&pt->timer); | ||
| 212 | if (pt->period) | ||
| 213 | ps->channels[0].count_load_time = ktime_get(); | ||
| 214 | |||
| 215 | return (pt->period == 0 ? 0 : 1); | ||
| 216 | } | ||
| 217 | |||
| 218 | int pit_has_pending_timer(struct kvm_vcpu *vcpu) | 227 | int pit_has_pending_timer(struct kvm_vcpu *vcpu) |
| 219 | { | 228 | { |
| 220 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 229 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
| @@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 235 | spin_unlock(&ps->inject_lock); | 244 | spin_unlock(&ps->inject_lock); |
| 236 | } | 245 | } |
| 237 | 246 | ||
| 238 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | ||
| 239 | { | ||
| 240 | struct kvm_kpit_state *ps; | ||
| 241 | int restart_timer = 0; | ||
| 242 | |||
| 243 | ps = container_of(data, struct kvm_kpit_state, pit_timer.timer); | ||
| 244 | |||
| 245 | restart_timer = __pit_timer_fn(ps); | ||
| 246 | |||
| 247 | if (restart_timer) | ||
| 248 | return HRTIMER_RESTART; | ||
| 249 | else | ||
| 250 | return HRTIMER_NORESTART; | ||
| 251 | } | ||
| 252 | |||
| 253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 247 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
| 254 | { | 248 | { |
| 255 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 249 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
| @@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
| 263 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 257 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
| 264 | } | 258 | } |
| 265 | 259 | ||
| 266 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) | 260 | static void destroy_pit_timer(struct kvm_timer *pt) |
| 267 | { | 261 | { |
| 268 | pr_debug("pit: execute del timer!\n"); | 262 | pr_debug("pit: execute del timer!\n"); |
| 269 | hrtimer_cancel(&pt->timer); | 263 | hrtimer_cancel(&pt->timer); |
| 270 | } | 264 | } |
| 271 | 265 | ||
| 266 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | ||
| 267 | { | ||
| 268 | struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, | ||
| 269 | pit_timer); | ||
| 270 | return ps->is_periodic; | ||
| 271 | } | ||
| 272 | |||
| 273 | static struct kvm_timer_ops kpit_ops = { | ||
| 274 | .is_periodic = kpit_is_periodic, | ||
| 275 | }; | ||
| 276 | |||
| 272 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | 277 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
| 273 | { | 278 | { |
| 274 | struct kvm_kpit_timer *pt = &ps->pit_timer; | 279 | struct kvm_timer *pt = &ps->pit_timer; |
| 275 | s64 interval; | 280 | s64 interval; |
| 276 | 281 | ||
| 277 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 282 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
| @@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
| 280 | 285 | ||
| 281 | /* TODO The new value only affected after the retriggered */ | 286 | /* TODO The new value only affected after the retriggered */ |
| 282 | hrtimer_cancel(&pt->timer); | 287 | hrtimer_cancel(&pt->timer); |
| 283 | pt->period = (is_period == 0) ? 0 : interval; | 288 | pt->period = interval; |
| 284 | pt->timer.function = pit_timer_fn; | 289 | ps->is_periodic = is_period; |
| 290 | |||
| 291 | pt->timer.function = kvm_timer_fn; | ||
| 292 | pt->t_ops = &kpit_ops; | ||
| 293 | pt->kvm = ps->pit->kvm; | ||
| 294 | pt->vcpu_id = 0; | ||
| 295 | |||
| 285 | atomic_set(&pt->pending, 0); | 296 | atomic_set(&pt->pending, 0); |
| 286 | ps->irq_ack = 1; | 297 | ps->irq_ack = 1; |
| 287 | 298 | ||
| @@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
| 298 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); | 309 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); |
| 299 | 310 | ||
| 300 | /* | 311 | /* |
| 301 | * Though spec said the state of 8254 is undefined after power-up, | 312 | * The largest possible initial count is 0; this is equivalent |
| 302 | * seems some tricky OS like Windows XP depends on IRQ0 interrupt | 313 | * to 216 for binary counting and 104 for BCD counting. |
| 303 | * when booting up. | ||
| 304 | * So here setting initialize rate for it, and not a specific number | ||
| 305 | */ | 314 | */ |
| 306 | if (val == 0) | 315 | if (val == 0) |
| 307 | val = 0x10000; | 316 | val = 0x10000; |
| 308 | 317 | ||
| 309 | ps->channels[channel].count_load_time = ktime_get(); | ||
| 310 | ps->channels[channel].count = val; | 318 | ps->channels[channel].count = val; |
| 311 | 319 | ||
| 312 | if (channel != 0) | 320 | if (channel != 0) { |
| 321 | ps->channels[channel].count_load_time = ktime_get(); | ||
| 313 | return; | 322 | return; |
| 323 | } | ||
| 314 | 324 | ||
| 315 | /* Two types of timer | 325 | /* Two types of timer |
| 316 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ | 326 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ |
| 317 | switch (ps->channels[0].mode) { | 327 | switch (ps->channels[0].mode) { |
| 328 | case 0: | ||
| 318 | case 1: | 329 | case 1: |
| 319 | /* FIXME: enhance mode 4 precision */ | 330 | /* FIXME: enhance mode 4 precision */ |
| 320 | case 4: | 331 | case 4: |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 6acbe4b505d5..bbd863ff60b7 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
| @@ -3,15 +3,6 @@ | |||
| 3 | 3 | ||
| 4 | #include "iodev.h" | 4 | #include "iodev.h" |
| 5 | 5 | ||
| 6 | struct kvm_kpit_timer { | ||
| 7 | struct hrtimer timer; | ||
| 8 | int irq; | ||
| 9 | s64 period; /* unit: ns */ | ||
| 10 | s64 scheduled; | ||
| 11 | atomic_t pending; | ||
| 12 | bool reinject; | ||
| 13 | }; | ||
| 14 | |||
| 15 | struct kvm_kpit_channel_state { | 6 | struct kvm_kpit_channel_state { |
| 16 | u32 count; /* can be 65536 */ | 7 | u32 count; /* can be 65536 */ |
| 17 | u16 latched_count; | 8 | u16 latched_count; |
| @@ -30,7 +21,8 @@ struct kvm_kpit_channel_state { | |||
| 30 | 21 | ||
| 31 | struct kvm_kpit_state { | 22 | struct kvm_kpit_state { |
| 32 | struct kvm_kpit_channel_state channels[3]; | 23 | struct kvm_kpit_channel_state channels[3]; |
| 33 | struct kvm_kpit_timer pit_timer; | 24 | struct kvm_timer pit_timer; |
| 25 | bool is_periodic; | ||
| 34 | u32 speaker_data_on; | 26 | u32 speaker_data_on; |
| 35 | struct mutex lock; | 27 | struct mutex lock; |
| 36 | struct kvm_pit *pit; | 28 | struct kvm_pit *pit; |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index cf17ed52f6fb..96dfbb6ad2a9 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | 24 | ||
| 25 | #include "irq.h" | 25 | #include "irq.h" |
| 26 | #include "i8254.h" | 26 | #include "i8254.h" |
| 27 | #include "x86.h" | ||
| 27 | 28 | ||
| 28 | /* | 29 | /* |
| 29 | * check if there are pending timer events | 30 | * check if there are pending timer events |
| @@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | |||
| 48 | { | 49 | { |
| 49 | struct kvm_pic *s; | 50 | struct kvm_pic *s; |
| 50 | 51 | ||
| 52 | if (!irqchip_in_kernel(v->kvm)) | ||
| 53 | return v->arch.interrupt.pending; | ||
| 54 | |||
| 51 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ | 55 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ |
| 52 | if (kvm_apic_accept_pic_intr(v)) { | 56 | if (kvm_apic_accept_pic_intr(v)) { |
| 53 | s = pic_irqchip(v->kvm); /* PIC */ | 57 | s = pic_irqchip(v->kvm); /* PIC */ |
| @@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | |||
| 67 | struct kvm_pic *s; | 71 | struct kvm_pic *s; |
| 68 | int vector; | 72 | int vector; |
| 69 | 73 | ||
| 74 | if (!irqchip_in_kernel(v->kvm)) | ||
| 75 | return v->arch.interrupt.nr; | ||
| 76 | |||
| 70 | vector = kvm_get_apic_interrupt(v); /* APIC */ | 77 | vector = kvm_get_apic_interrupt(v); /* APIC */ |
| 71 | if (vector == -1) { | 78 | if (vector == -1) { |
| 72 | if (kvm_apic_accept_pic_intr(v)) { | 79 | if (kvm_apic_accept_pic_intr(v)) { |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h new file mode 100644 index 000000000000..26bd6ba74e1c --- /dev/null +++ b/arch/x86/kvm/kvm_timer.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | |||
| 2 | struct kvm_timer { | ||
| 3 | struct hrtimer timer; | ||
| 4 | s64 period; /* unit: ns */ | ||
| 5 | atomic_t pending; /* accumulated triggered timers */ | ||
| 6 | bool reinject; | ||
| 7 | struct kvm_timer_ops *t_ops; | ||
| 8 | struct kvm *kvm; | ||
| 9 | int vcpu_id; | ||
| 10 | }; | ||
| 11 | |||
| 12 | struct kvm_timer_ops { | ||
| 13 | bool (*is_periodic)(struct kvm_timer *); | ||
| 14 | }; | ||
| 15 | |||
| 16 | |||
| 17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | ||
| 18 | |||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f0b67f2cdd69..ae99d83f81a3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -196,20 +196,15 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
| 196 | } | 196 | } |
| 197 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); | 197 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); |
| 198 | 198 | ||
| 199 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) | 199 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
| 200 | int vector, int level, int trig_mode); | ||
| 201 | |||
| 202 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | ||
| 200 | { | 203 | { |
| 201 | struct kvm_lapic *apic = vcpu->arch.apic; | 204 | struct kvm_lapic *apic = vcpu->arch.apic; |
| 202 | 205 | ||
| 203 | if (!apic_test_and_set_irr(vec, apic)) { | 206 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, |
| 204 | /* a new pending irq is set in IRR */ | 207 | irq->level, irq->trig_mode); |
| 205 | if (trig) | ||
| 206 | apic_set_vector(vec, apic->regs + APIC_TMR); | ||
| 207 | else | ||
| 208 | apic_clear_vector(vec, apic->regs + APIC_TMR); | ||
| 209 | kvm_vcpu_kick(apic->vcpu); | ||
| 210 | return 1; | ||
| 211 | } | ||
| 212 | return 0; | ||
| 213 | } | 208 | } |
| 214 | 209 | ||
| 215 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 210 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
| @@ -250,7 +245,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | |||
| 250 | 245 | ||
| 251 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) | 246 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) |
| 252 | { | 247 | { |
| 253 | return kvm_apic_id(apic) == dest; | 248 | return dest == 0xff || kvm_apic_id(apic) == dest; |
| 254 | } | 249 | } |
| 255 | 250 | ||
| 256 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | 251 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) |
| @@ -279,37 +274,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
| 279 | return result; | 274 | return result; |
| 280 | } | 275 | } |
| 281 | 276 | ||
| 282 | static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 277 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
| 283 | int short_hand, int dest, int dest_mode) | 278 | int short_hand, int dest, int dest_mode) |
| 284 | { | 279 | { |
| 285 | int result = 0; | 280 | int result = 0; |
| 286 | struct kvm_lapic *target = vcpu->arch.apic; | 281 | struct kvm_lapic *target = vcpu->arch.apic; |
| 287 | 282 | ||
| 288 | apic_debug("target %p, source %p, dest 0x%x, " | 283 | apic_debug("target %p, source %p, dest 0x%x, " |
| 289 | "dest_mode 0x%x, short_hand 0x%x", | 284 | "dest_mode 0x%x, short_hand 0x%x\n", |
| 290 | target, source, dest, dest_mode, short_hand); | 285 | target, source, dest, dest_mode, short_hand); |
| 291 | 286 | ||
| 292 | ASSERT(!target); | 287 | ASSERT(!target); |
| 293 | switch (short_hand) { | 288 | switch (short_hand) { |
| 294 | case APIC_DEST_NOSHORT: | 289 | case APIC_DEST_NOSHORT: |
| 295 | if (dest_mode == 0) { | 290 | if (dest_mode == 0) |
| 296 | /* Physical mode. */ | 291 | /* Physical mode. */ |
| 297 | if ((dest == 0xFF) || (dest == kvm_apic_id(target))) | 292 | result = kvm_apic_match_physical_addr(target, dest); |
| 298 | result = 1; | 293 | else |
| 299 | } else | ||
| 300 | /* Logical mode. */ | 294 | /* Logical mode. */ |
| 301 | result = kvm_apic_match_logical_addr(target, dest); | 295 | result = kvm_apic_match_logical_addr(target, dest); |
| 302 | break; | 296 | break; |
| 303 | case APIC_DEST_SELF: | 297 | case APIC_DEST_SELF: |
| 304 | if (target == source) | 298 | result = (target == source); |
| 305 | result = 1; | ||
| 306 | break; | 299 | break; |
| 307 | case APIC_DEST_ALLINC: | 300 | case APIC_DEST_ALLINC: |
| 308 | result = 1; | 301 | result = 1; |
| 309 | break; | 302 | break; |
| 310 | case APIC_DEST_ALLBUT: | 303 | case APIC_DEST_ALLBUT: |
| 311 | if (target != source) | 304 | result = (target != source); |
| 312 | result = 1; | ||
| 313 | break; | 305 | break; |
| 314 | default: | 306 | default: |
| 315 | printk(KERN_WARNING "Bad dest shorthand value %x\n", | 307 | printk(KERN_WARNING "Bad dest shorthand value %x\n", |
| @@ -327,20 +319,22 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
| 327 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 319 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
| 328 | int vector, int level, int trig_mode) | 320 | int vector, int level, int trig_mode) |
| 329 | { | 321 | { |
| 330 | int orig_irr, result = 0; | 322 | int result = 0; |
| 331 | struct kvm_vcpu *vcpu = apic->vcpu; | 323 | struct kvm_vcpu *vcpu = apic->vcpu; |
| 332 | 324 | ||
| 333 | switch (delivery_mode) { | 325 | switch (delivery_mode) { |
| 334 | case APIC_DM_FIXED: | ||
| 335 | case APIC_DM_LOWEST: | 326 | case APIC_DM_LOWEST: |
| 327 | vcpu->arch.apic_arb_prio++; | ||
| 328 | case APIC_DM_FIXED: | ||
| 336 | /* FIXME add logic for vcpu on reset */ | 329 | /* FIXME add logic for vcpu on reset */ |
| 337 | if (unlikely(!apic_enabled(apic))) | 330 | if (unlikely(!apic_enabled(apic))) |
| 338 | break; | 331 | break; |
| 339 | 332 | ||
| 340 | orig_irr = apic_test_and_set_irr(vector, apic); | 333 | result = !apic_test_and_set_irr(vector, apic); |
| 341 | if (orig_irr && trig_mode) { | 334 | if (!result) { |
| 342 | apic_debug("level trig mode repeatedly for vector %d", | 335 | if (trig_mode) |
| 343 | vector); | 336 | apic_debug("level trig mode repeatedly for " |
| 337 | "vector %d", vector); | ||
| 344 | break; | 338 | break; |
| 345 | } | 339 | } |
| 346 | 340 | ||
| @@ -349,10 +343,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 349 | apic_set_vector(vector, apic->regs + APIC_TMR); | 343 | apic_set_vector(vector, apic->regs + APIC_TMR); |
| 350 | } else | 344 | } else |
| 351 | apic_clear_vector(vector, apic->regs + APIC_TMR); | 345 | apic_clear_vector(vector, apic->regs + APIC_TMR); |
| 352 | |||
| 353 | kvm_vcpu_kick(vcpu); | 346 | kvm_vcpu_kick(vcpu); |
| 354 | |||
| 355 | result = (orig_irr == 0); | ||
| 356 | break; | 347 | break; |
| 357 | 348 | ||
| 358 | case APIC_DM_REMRD: | 349 | case APIC_DM_REMRD: |
| @@ -364,12 +355,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 364 | break; | 355 | break; |
| 365 | 356 | ||
| 366 | case APIC_DM_NMI: | 357 | case APIC_DM_NMI: |
| 358 | result = 1; | ||
| 367 | kvm_inject_nmi(vcpu); | 359 | kvm_inject_nmi(vcpu); |
| 368 | kvm_vcpu_kick(vcpu); | 360 | kvm_vcpu_kick(vcpu); |
| 369 | break; | 361 | break; |
| 370 | 362 | ||
| 371 | case APIC_DM_INIT: | 363 | case APIC_DM_INIT: |
| 372 | if (level) { | 364 | if (level) { |
| 365 | result = 1; | ||
| 373 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 366 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
| 374 | printk(KERN_DEBUG | 367 | printk(KERN_DEBUG |
| 375 | "INIT on a runnable vcpu %d\n", | 368 | "INIT on a runnable vcpu %d\n", |
| @@ -386,6 +379,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 386 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", | 379 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
| 387 | vcpu->vcpu_id, vector); | 380 | vcpu->vcpu_id, vector); |
| 388 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 381 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
| 382 | result = 1; | ||
| 389 | vcpu->arch.sipi_vector = vector; | 383 | vcpu->arch.sipi_vector = vector; |
| 390 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 384 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; |
| 391 | kvm_vcpu_kick(vcpu); | 385 | kvm_vcpu_kick(vcpu); |
| @@ -408,43 +402,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 408 | return result; | 402 | return result; |
| 409 | } | 403 | } |
| 410 | 404 | ||
| 411 | static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, | 405 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) |
| 412 | unsigned long bitmap) | ||
| 413 | { | ||
| 414 | int last; | ||
| 415 | int next; | ||
| 416 | struct kvm_lapic *apic = NULL; | ||
| 417 | |||
| 418 | last = kvm->arch.round_robin_prev_vcpu; | ||
| 419 | next = last; | ||
| 420 | |||
| 421 | do { | ||
| 422 | if (++next == KVM_MAX_VCPUS) | ||
| 423 | next = 0; | ||
| 424 | if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap)) | ||
| 425 | continue; | ||
| 426 | apic = kvm->vcpus[next]->arch.apic; | ||
| 427 | if (apic && apic_enabled(apic)) | ||
| 428 | break; | ||
| 429 | apic = NULL; | ||
| 430 | } while (next != last); | ||
| 431 | kvm->arch.round_robin_prev_vcpu = next; | ||
| 432 | |||
| 433 | if (!apic) | ||
| 434 | printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); | ||
| 435 | |||
| 436 | return apic; | ||
| 437 | } | ||
| 438 | |||
| 439 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | ||
| 440 | unsigned long bitmap) | ||
| 441 | { | 406 | { |
| 442 | struct kvm_lapic *apic; | 407 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
| 443 | |||
| 444 | apic = kvm_apic_round_robin(kvm, vector, bitmap); | ||
| 445 | if (apic) | ||
| 446 | return apic->vcpu; | ||
| 447 | return NULL; | ||
| 448 | } | 408 | } |
| 449 | 409 | ||
| 450 | static void apic_set_eoi(struct kvm_lapic *apic) | 410 | static void apic_set_eoi(struct kvm_lapic *apic) |
| @@ -472,47 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
| 472 | { | 432 | { |
| 473 | u32 icr_low = apic_get_reg(apic, APIC_ICR); | 433 | u32 icr_low = apic_get_reg(apic, APIC_ICR); |
| 474 | u32 icr_high = apic_get_reg(apic, APIC_ICR2); | 434 | u32 icr_high = apic_get_reg(apic, APIC_ICR2); |
| 435 | struct kvm_lapic_irq irq; | ||
| 475 | 436 | ||
| 476 | unsigned int dest = GET_APIC_DEST_FIELD(icr_high); | 437 | irq.vector = icr_low & APIC_VECTOR_MASK; |
| 477 | unsigned int short_hand = icr_low & APIC_SHORT_MASK; | 438 | irq.delivery_mode = icr_low & APIC_MODE_MASK; |
| 478 | unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; | 439 | irq.dest_mode = icr_low & APIC_DEST_MASK; |
| 479 | unsigned int level = icr_low & APIC_INT_ASSERT; | 440 | irq.level = icr_low & APIC_INT_ASSERT; |
| 480 | unsigned int dest_mode = icr_low & APIC_DEST_MASK; | 441 | irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; |
| 481 | unsigned int delivery_mode = icr_low & APIC_MODE_MASK; | 442 | irq.shorthand = icr_low & APIC_SHORT_MASK; |
| 482 | unsigned int vector = icr_low & APIC_VECTOR_MASK; | 443 | irq.dest_id = GET_APIC_DEST_FIELD(icr_high); |
| 483 | |||
| 484 | struct kvm_vcpu *target; | ||
| 485 | struct kvm_vcpu *vcpu; | ||
| 486 | unsigned long lpr_map = 0; | ||
| 487 | int i; | ||
| 488 | 444 | ||
| 489 | apic_debug("icr_high 0x%x, icr_low 0x%x, " | 445 | apic_debug("icr_high 0x%x, icr_low 0x%x, " |
| 490 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " | 446 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " |
| 491 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", | 447 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", |
| 492 | icr_high, icr_low, short_hand, dest, | 448 | icr_high, icr_low, irq.shorthand, irq.dest_id, |
| 493 | trig_mode, level, dest_mode, delivery_mode, vector); | 449 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
| 494 | 450 | irq.vector); | |
| 495 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | ||
| 496 | vcpu = apic->vcpu->kvm->vcpus[i]; | ||
| 497 | if (!vcpu) | ||
| 498 | continue; | ||
| 499 | |||
| 500 | if (vcpu->arch.apic && | ||
| 501 | apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { | ||
| 502 | if (delivery_mode == APIC_DM_LOWEST) | ||
| 503 | set_bit(vcpu->vcpu_id, &lpr_map); | ||
| 504 | else | ||
| 505 | __apic_accept_irq(vcpu->arch.apic, delivery_mode, | ||
| 506 | vector, level, trig_mode); | ||
| 507 | } | ||
| 508 | } | ||
| 509 | 451 | ||
| 510 | if (delivery_mode == APIC_DM_LOWEST) { | 452 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); |
| 511 | target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map); | ||
| 512 | if (target != NULL) | ||
| 513 | __apic_accept_irq(target->arch.apic, delivery_mode, | ||
| 514 | vector, level, trig_mode); | ||
| 515 | } | ||
| 516 | } | 453 | } |
| 517 | 454 | ||
| 518 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 455 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
| @@ -527,12 +464,13 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
| 527 | if (apic_get_reg(apic, APIC_TMICT) == 0) | 464 | if (apic_get_reg(apic, APIC_TMICT) == 0) |
| 528 | return 0; | 465 | return 0; |
| 529 | 466 | ||
| 530 | remaining = hrtimer_expires_remaining(&apic->timer.dev); | 467 | remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); |
| 531 | if (ktime_to_ns(remaining) < 0) | 468 | if (ktime_to_ns(remaining) < 0) |
| 532 | remaining = ktime_set(0, 0); | 469 | remaining = ktime_set(0, 0); |
| 533 | 470 | ||
| 534 | ns = mod_64(ktime_to_ns(remaining), apic->timer.period); | 471 | ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); |
| 535 | tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); | 472 | tmcct = div64_u64(ns, |
| 473 | (APIC_BUS_CYCLE_NS * apic->divide_count)); | ||
| 536 | 474 | ||
| 537 | return tmcct; | 475 | return tmcct; |
| 538 | } | 476 | } |
| @@ -619,25 +557,25 @@ static void update_divide_count(struct kvm_lapic *apic) | |||
| 619 | tdcr = apic_get_reg(apic, APIC_TDCR); | 557 | tdcr = apic_get_reg(apic, APIC_TDCR); |
| 620 | tmp1 = tdcr & 0xf; | 558 | tmp1 = tdcr & 0xf; |
| 621 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; | 559 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; |
| 622 | apic->timer.divide_count = 0x1 << (tmp2 & 0x7); | 560 | apic->divide_count = 0x1 << (tmp2 & 0x7); |
| 623 | 561 | ||
| 624 | apic_debug("timer divide count is 0x%x\n", | 562 | apic_debug("timer divide count is 0x%x\n", |
| 625 | apic->timer.divide_count); | 563 | apic->divide_count); |
| 626 | } | 564 | } |
| 627 | 565 | ||
| 628 | static void start_apic_timer(struct kvm_lapic *apic) | 566 | static void start_apic_timer(struct kvm_lapic *apic) |
| 629 | { | 567 | { |
| 630 | ktime_t now = apic->timer.dev.base->get_time(); | 568 | ktime_t now = apic->lapic_timer.timer.base->get_time(); |
| 631 | 569 | ||
| 632 | apic->timer.period = apic_get_reg(apic, APIC_TMICT) * | 570 | apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * |
| 633 | APIC_BUS_CYCLE_NS * apic->timer.divide_count; | 571 | APIC_BUS_CYCLE_NS * apic->divide_count; |
| 634 | atomic_set(&apic->timer.pending, 0); | 572 | atomic_set(&apic->lapic_timer.pending, 0); |
| 635 | 573 | ||
| 636 | if (!apic->timer.period) | 574 | if (!apic->lapic_timer.period) |
| 637 | return; | 575 | return; |
| 638 | 576 | ||
| 639 | hrtimer_start(&apic->timer.dev, | 577 | hrtimer_start(&apic->lapic_timer.timer, |
| 640 | ktime_add_ns(now, apic->timer.period), | 578 | ktime_add_ns(now, apic->lapic_timer.period), |
| 641 | HRTIMER_MODE_ABS); | 579 | HRTIMER_MODE_ABS); |
| 642 | 580 | ||
| 643 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" | 581 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" |
| @@ -646,9 +584,9 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
| 646 | "expire @ 0x%016" PRIx64 ".\n", __func__, | 584 | "expire @ 0x%016" PRIx64 ".\n", __func__, |
| 647 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), | 585 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), |
| 648 | apic_get_reg(apic, APIC_TMICT), | 586 | apic_get_reg(apic, APIC_TMICT), |
| 649 | apic->timer.period, | 587 | apic->lapic_timer.period, |
| 650 | ktime_to_ns(ktime_add_ns(now, | 588 | ktime_to_ns(ktime_add_ns(now, |
| 651 | apic->timer.period))); | 589 | apic->lapic_timer.period))); |
| 652 | } | 590 | } |
| 653 | 591 | ||
| 654 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | 592 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) |
| @@ -730,7 +668,7 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
| 730 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, | 668 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, |
| 731 | lvt_val | APIC_LVT_MASKED); | 669 | lvt_val | APIC_LVT_MASKED); |
| 732 | } | 670 | } |
| 733 | atomic_set(&apic->timer.pending, 0); | 671 | atomic_set(&apic->lapic_timer.pending, 0); |
| 734 | 672 | ||
| 735 | } | 673 | } |
| 736 | break; | 674 | break; |
| @@ -762,7 +700,7 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
| 762 | break; | 700 | break; |
| 763 | 701 | ||
| 764 | case APIC_TMICT: | 702 | case APIC_TMICT: |
| 765 | hrtimer_cancel(&apic->timer.dev); | 703 | hrtimer_cancel(&apic->lapic_timer.timer); |
| 766 | apic_set_reg(apic, APIC_TMICT, val); | 704 | apic_set_reg(apic, APIC_TMICT, val); |
| 767 | start_apic_timer(apic); | 705 | start_apic_timer(apic); |
| 768 | return; | 706 | return; |
| @@ -802,7 +740,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
| 802 | if (!vcpu->arch.apic) | 740 | if (!vcpu->arch.apic) |
| 803 | return; | 741 | return; |
| 804 | 742 | ||
| 805 | hrtimer_cancel(&vcpu->arch.apic->timer.dev); | 743 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); |
| 806 | 744 | ||
| 807 | if (vcpu->arch.apic->regs_page) | 745 | if (vcpu->arch.apic->regs_page) |
| 808 | __free_page(vcpu->arch.apic->regs_page); | 746 | __free_page(vcpu->arch.apic->regs_page); |
| @@ -880,7 +818,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
| 880 | ASSERT(apic != NULL); | 818 | ASSERT(apic != NULL); |
| 881 | 819 | ||
| 882 | /* Stop the timer in case it's a reset to an active apic */ | 820 | /* Stop the timer in case it's a reset to an active apic */ |
| 883 | hrtimer_cancel(&apic->timer.dev); | 821 | hrtimer_cancel(&apic->lapic_timer.timer); |
| 884 | 822 | ||
| 885 | apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); | 823 | apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); |
| 886 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); | 824 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); |
| @@ -905,11 +843,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
| 905 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 843 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
| 906 | } | 844 | } |
| 907 | update_divide_count(apic); | 845 | update_divide_count(apic); |
| 908 | atomic_set(&apic->timer.pending, 0); | 846 | atomic_set(&apic->lapic_timer.pending, 0); |
| 909 | if (vcpu->vcpu_id == 0) | 847 | if (vcpu->vcpu_id == 0) |
| 910 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 848 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; |
| 911 | apic_update_ppr(apic); | 849 | apic_update_ppr(apic); |
| 912 | 850 | ||
| 851 | vcpu->arch.apic_arb_prio = 0; | ||
| 852 | |||
| 913 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" | 853 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" |
| 914 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, | 854 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, |
| 915 | vcpu, kvm_apic_id(apic), | 855 | vcpu, kvm_apic_id(apic), |
| @@ -917,16 +857,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
| 917 | } | 857 | } |
| 918 | EXPORT_SYMBOL_GPL(kvm_lapic_reset); | 858 | EXPORT_SYMBOL_GPL(kvm_lapic_reset); |
| 919 | 859 | ||
| 920 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | 860 | bool kvm_apic_present(struct kvm_vcpu *vcpu) |
| 921 | { | 861 | { |
| 922 | struct kvm_lapic *apic = vcpu->arch.apic; | 862 | return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); |
| 923 | int ret = 0; | 863 | } |
| 924 | |||
| 925 | if (!apic) | ||
| 926 | return 0; | ||
| 927 | ret = apic_enabled(apic); | ||
| 928 | 864 | ||
| 929 | return ret; | 865 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu) |
| 866 | { | ||
| 867 | return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); | ||
| 930 | } | 868 | } |
| 931 | EXPORT_SYMBOL_GPL(kvm_lapic_enabled); | 869 | EXPORT_SYMBOL_GPL(kvm_lapic_enabled); |
| 932 | 870 | ||
| @@ -936,22 +874,11 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled); | |||
| 936 | *---------------------------------------------------------------------- | 874 | *---------------------------------------------------------------------- |
| 937 | */ | 875 | */ |
| 938 | 876 | ||
| 939 | /* TODO: make sure __apic_timer_fn runs in current pCPU */ | 877 | static bool lapic_is_periodic(struct kvm_timer *ktimer) |
| 940 | static int __apic_timer_fn(struct kvm_lapic *apic) | ||
| 941 | { | 878 | { |
| 942 | int result = 0; | 879 | struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, |
| 943 | wait_queue_head_t *q = &apic->vcpu->wq; | 880 | lapic_timer); |
| 944 | 881 | return apic_lvtt_period(apic); | |
| 945 | if(!atomic_inc_and_test(&apic->timer.pending)) | ||
| 946 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | ||
| 947 | if (waitqueue_active(q)) | ||
| 948 | wake_up_interruptible(q); | ||
| 949 | |||
| 950 | if (apic_lvtt_period(apic)) { | ||
| 951 | result = 1; | ||
| 952 | hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period); | ||
| 953 | } | ||
| 954 | return result; | ||
| 955 | } | 882 | } |
| 956 | 883 | ||
| 957 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) | 884 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) |
| @@ -959,7 +886,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 959 | struct kvm_lapic *lapic = vcpu->arch.apic; | 886 | struct kvm_lapic *lapic = vcpu->arch.apic; |
| 960 | 887 | ||
| 961 | if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) | 888 | if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) |
| 962 | return atomic_read(&lapic->timer.pending); | 889 | return atomic_read(&lapic->lapic_timer.pending); |
| 963 | 890 | ||
| 964 | return 0; | 891 | return 0; |
| 965 | } | 892 | } |
| @@ -986,20 +913,9 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) | |||
| 986 | kvm_apic_local_deliver(apic, APIC_LVT0); | 913 | kvm_apic_local_deliver(apic, APIC_LVT0); |
| 987 | } | 914 | } |
| 988 | 915 | ||
| 989 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | 916 | static struct kvm_timer_ops lapic_timer_ops = { |
| 990 | { | 917 | .is_periodic = lapic_is_periodic, |
| 991 | struct kvm_lapic *apic; | 918 | }; |
| 992 | int restart_timer = 0; | ||
| 993 | |||
| 994 | apic = container_of(data, struct kvm_lapic, timer.dev); | ||
| 995 | |||
| 996 | restart_timer = __apic_timer_fn(apic); | ||
| 997 | |||
| 998 | if (restart_timer) | ||
| 999 | return HRTIMER_RESTART; | ||
| 1000 | else | ||
| 1001 | return HRTIMER_NORESTART; | ||
| 1002 | } | ||
| 1003 | 919 | ||
| 1004 | int kvm_create_lapic(struct kvm_vcpu *vcpu) | 920 | int kvm_create_lapic(struct kvm_vcpu *vcpu) |
| 1005 | { | 921 | { |
| @@ -1024,8 +940,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
| 1024 | memset(apic->regs, 0, PAGE_SIZE); | 940 | memset(apic->regs, 0, PAGE_SIZE); |
| 1025 | apic->vcpu = vcpu; | 941 | apic->vcpu = vcpu; |
| 1026 | 942 | ||
| 1027 | hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 943 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
| 1028 | apic->timer.dev.function = apic_timer_fn; | 944 | HRTIMER_MODE_ABS); |
| 945 | apic->lapic_timer.timer.function = kvm_timer_fn; | ||
| 946 | apic->lapic_timer.t_ops = &lapic_timer_ops; | ||
| 947 | apic->lapic_timer.kvm = vcpu->kvm; | ||
| 948 | apic->lapic_timer.vcpu_id = vcpu->vcpu_id; | ||
| 949 | |||
| 1029 | apic->base_address = APIC_DEFAULT_PHYS_BASE; | 950 | apic->base_address = APIC_DEFAULT_PHYS_BASE; |
| 1030 | vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; | 951 | vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; |
| 1031 | 952 | ||
| @@ -1078,9 +999,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
| 1078 | { | 999 | { |
| 1079 | struct kvm_lapic *apic = vcpu->arch.apic; | 1000 | struct kvm_lapic *apic = vcpu->arch.apic; |
| 1080 | 1001 | ||
| 1081 | if (apic && atomic_read(&apic->timer.pending) > 0) { | 1002 | if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { |
| 1082 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) | 1003 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) |
| 1083 | atomic_dec(&apic->timer.pending); | 1004 | atomic_dec(&apic->lapic_timer.pending); |
| 1084 | } | 1005 | } |
| 1085 | } | 1006 | } |
| 1086 | 1007 | ||
| @@ -1106,7 +1027,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
| 1106 | MSR_IA32_APICBASE_BASE; | 1027 | MSR_IA32_APICBASE_BASE; |
| 1107 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); | 1028 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); |
| 1108 | apic_update_ppr(apic); | 1029 | apic_update_ppr(apic); |
| 1109 | hrtimer_cancel(&apic->timer.dev); | 1030 | hrtimer_cancel(&apic->lapic_timer.timer); |
| 1110 | update_divide_count(apic); | 1031 | update_divide_count(apic); |
| 1111 | start_apic_timer(apic); | 1032 | start_apic_timer(apic); |
| 1112 | } | 1033 | } |
| @@ -1119,7 +1040,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
| 1119 | if (!apic) | 1040 | if (!apic) |
| 1120 | return; | 1041 | return; |
| 1121 | 1042 | ||
| 1122 | timer = &apic->timer.dev; | 1043 | timer = &apic->lapic_timer.timer; |
| 1123 | if (hrtimer_cancel(timer)) | 1044 | if (hrtimer_cancel(timer)) |
| 1124 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1045 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
| 1125 | } | 1046 | } |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 45ab6ee71209..a587f8349c46 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
| @@ -2,18 +2,15 @@ | |||
| 2 | #define __KVM_X86_LAPIC_H | 2 | #define __KVM_X86_LAPIC_H |
| 3 | 3 | ||
| 4 | #include "iodev.h" | 4 | #include "iodev.h" |
| 5 | #include "kvm_timer.h" | ||
| 5 | 6 | ||
| 6 | #include <linux/kvm_host.h> | 7 | #include <linux/kvm_host.h> |
| 7 | 8 | ||
| 8 | struct kvm_lapic { | 9 | struct kvm_lapic { |
| 9 | unsigned long base_address; | 10 | unsigned long base_address; |
| 10 | struct kvm_io_device dev; | 11 | struct kvm_io_device dev; |
| 11 | struct { | 12 | struct kvm_timer lapic_timer; |
| 12 | atomic_t pending; | 13 | u32 divide_count; |
| 13 | s64 period; /* unit: ns */ | ||
| 14 | u32 divide_count; | ||
| 15 | struct hrtimer dev; | ||
| 16 | } timer; | ||
| 17 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
| 18 | struct page *regs_page; | 15 | struct page *regs_page; |
| 19 | void *regs; | 16 | void *regs; |
| @@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | |||
| 34 | 31 | ||
| 35 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 32 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
| 36 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 33 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
| 37 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); | 34 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); |
| 38 | 35 | ||
| 39 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 36 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
| 40 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 37 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
| 41 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); | 38 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); |
| 42 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu); | 39 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu); |
| 40 | bool kvm_apic_present(struct kvm_vcpu *vcpu); | ||
| 43 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 41 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
| 44 | 42 | ||
| 45 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 43 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 32cf11e5728a..5c3d6e81a7dc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644); | |||
| 126 | #define PFERR_PRESENT_MASK (1U << 0) | 126 | #define PFERR_PRESENT_MASK (1U << 0) |
| 127 | #define PFERR_WRITE_MASK (1U << 1) | 127 | #define PFERR_WRITE_MASK (1U << 1) |
| 128 | #define PFERR_USER_MASK (1U << 2) | 128 | #define PFERR_USER_MASK (1U << 2) |
| 129 | #define PFERR_RSVD_MASK (1U << 3) | ||
| 129 | #define PFERR_FETCH_MASK (1U << 4) | 130 | #define PFERR_FETCH_MASK (1U << 4) |
| 130 | 131 | ||
| 131 | #define PT_DIRECTORY_LEVEL 2 | 132 | #define PT_DIRECTORY_LEVEL 2 |
| @@ -177,7 +178,11 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | |||
| 177 | static u64 __read_mostly shadow_user_mask; | 178 | static u64 __read_mostly shadow_user_mask; |
| 178 | static u64 __read_mostly shadow_accessed_mask; | 179 | static u64 __read_mostly shadow_accessed_mask; |
| 179 | static u64 __read_mostly shadow_dirty_mask; | 180 | static u64 __read_mostly shadow_dirty_mask; |
| 180 | static u64 __read_mostly shadow_mt_mask; | 181 | |
| 182 | static inline u64 rsvd_bits(int s, int e) | ||
| 183 | { | ||
| 184 | return ((1ULL << (e - s + 1)) - 1) << s; | ||
| 185 | } | ||
| 181 | 186 | ||
| 182 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 187 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
| 183 | { | 188 | { |
| @@ -193,14 +198,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte) | |||
| 193 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | 198 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); |
| 194 | 199 | ||
| 195 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 200 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
| 196 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) | 201 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
| 197 | { | 202 | { |
| 198 | shadow_user_mask = user_mask; | 203 | shadow_user_mask = user_mask; |
| 199 | shadow_accessed_mask = accessed_mask; | 204 | shadow_accessed_mask = accessed_mask; |
| 200 | shadow_dirty_mask = dirty_mask; | 205 | shadow_dirty_mask = dirty_mask; |
| 201 | shadow_nx_mask = nx_mask; | 206 | shadow_nx_mask = nx_mask; |
| 202 | shadow_x_mask = x_mask; | 207 | shadow_x_mask = x_mask; |
| 203 | shadow_mt_mask = mt_mask; | ||
| 204 | } | 208 | } |
| 205 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 209 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
| 206 | 210 | ||
| @@ -219,11 +223,6 @@ static int is_nx(struct kvm_vcpu *vcpu) | |||
| 219 | return vcpu->arch.shadow_efer & EFER_NX; | 223 | return vcpu->arch.shadow_efer & EFER_NX; |
| 220 | } | 224 | } |
| 221 | 225 | ||
| 222 | static int is_present_pte(unsigned long pte) | ||
| 223 | { | ||
| 224 | return pte & PT_PRESENT_MASK; | ||
| 225 | } | ||
| 226 | |||
| 227 | static int is_shadow_present_pte(u64 pte) | 226 | static int is_shadow_present_pte(u64 pte) |
| 228 | { | 227 | { |
| 229 | return pte != shadow_trap_nonpresent_pte | 228 | return pte != shadow_trap_nonpresent_pte |
| @@ -1074,18 +1073,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
| 1074 | return NULL; | 1073 | return NULL; |
| 1075 | } | 1074 | } |
| 1076 | 1075 | ||
| 1077 | static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1078 | { | ||
| 1079 | list_del(&sp->oos_link); | ||
| 1080 | --kvm->stat.mmu_unsync_global; | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1076 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 1084 | { | 1077 | { |
| 1085 | WARN_ON(!sp->unsync); | 1078 | WARN_ON(!sp->unsync); |
| 1086 | sp->unsync = 0; | 1079 | sp->unsync = 0; |
| 1087 | if (sp->global) | ||
| 1088 | kvm_unlink_unsync_global(kvm, sp); | ||
| 1089 | --kvm->stat.mmu_unsync; | 1080 | --kvm->stat.mmu_unsync; |
| 1090 | } | 1081 | } |
| 1091 | 1082 | ||
| @@ -1248,7 +1239,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 1248 | pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); | 1239 | pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); |
| 1249 | sp->gfn = gfn; | 1240 | sp->gfn = gfn; |
| 1250 | sp->role = role; | 1241 | sp->role = role; |
| 1251 | sp->global = 0; | ||
| 1252 | hlist_add_head(&sp->hash_link, bucket); | 1242 | hlist_add_head(&sp->hash_link, bucket); |
| 1253 | if (!direct) { | 1243 | if (!direct) { |
| 1254 | if (rmap_write_protect(vcpu->kvm, gfn)) | 1244 | if (rmap_write_protect(vcpu->kvm, gfn)) |
| @@ -1616,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state, | |||
| 1616 | return mtrr_state->def_type; | 1606 | return mtrr_state->def_type; |
| 1617 | } | 1607 | } |
| 1618 | 1608 | ||
| 1619 | static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | 1609 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) |
| 1620 | { | 1610 | { |
| 1621 | u8 mtrr; | 1611 | u8 mtrr; |
| 1622 | 1612 | ||
| @@ -1626,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
| 1626 | mtrr = MTRR_TYPE_WRBACK; | 1616 | mtrr = MTRR_TYPE_WRBACK; |
| 1627 | return mtrr; | 1617 | return mtrr; |
| 1628 | } | 1618 | } |
| 1619 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | ||
| 1629 | 1620 | ||
| 1630 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1621 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 1631 | { | 1622 | { |
| @@ -1646,11 +1637,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 1646 | ++vcpu->kvm->stat.mmu_unsync; | 1637 | ++vcpu->kvm->stat.mmu_unsync; |
| 1647 | sp->unsync = 1; | 1638 | sp->unsync = 1; |
| 1648 | 1639 | ||
| 1649 | if (sp->global) { | 1640 | kvm_mmu_mark_parents_unsync(vcpu, sp); |
| 1650 | list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); | ||
| 1651 | ++vcpu->kvm->stat.mmu_unsync_global; | ||
| 1652 | } else | ||
| 1653 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
| 1654 | 1641 | ||
| 1655 | mmu_convert_notrap(sp); | 1642 | mmu_convert_notrap(sp); |
| 1656 | return 0; | 1643 | return 0; |
| @@ -1677,21 +1664,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
| 1677 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1664 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
| 1678 | unsigned pte_access, int user_fault, | 1665 | unsigned pte_access, int user_fault, |
| 1679 | int write_fault, int dirty, int largepage, | 1666 | int write_fault, int dirty, int largepage, |
| 1680 | int global, gfn_t gfn, pfn_t pfn, bool speculative, | 1667 | gfn_t gfn, pfn_t pfn, bool speculative, |
| 1681 | bool can_unsync) | 1668 | bool can_unsync) |
| 1682 | { | 1669 | { |
| 1683 | u64 spte; | 1670 | u64 spte; |
| 1684 | int ret = 0; | 1671 | int ret = 0; |
| 1685 | u64 mt_mask = shadow_mt_mask; | ||
| 1686 | struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); | ||
| 1687 | |||
| 1688 | if (!global && sp->global) { | ||
| 1689 | sp->global = 0; | ||
| 1690 | if (sp->unsync) { | ||
| 1691 | kvm_unlink_unsync_global(vcpu->kvm, sp); | ||
| 1692 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
| 1693 | } | ||
| 1694 | } | ||
| 1695 | 1672 | ||
| 1696 | /* | 1673 | /* |
| 1697 | * We don't set the accessed bit, since we sometimes want to see | 1674 | * We don't set the accessed bit, since we sometimes want to see |
| @@ -1711,16 +1688,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
| 1711 | spte |= shadow_user_mask; | 1688 | spte |= shadow_user_mask; |
| 1712 | if (largepage) | 1689 | if (largepage) |
| 1713 | spte |= PT_PAGE_SIZE_MASK; | 1690 | spte |= PT_PAGE_SIZE_MASK; |
| 1714 | if (mt_mask) { | 1691 | if (tdp_enabled) |
| 1715 | if (!kvm_is_mmio_pfn(pfn)) { | 1692 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
| 1716 | mt_mask = get_memory_type(vcpu, gfn) << | 1693 | kvm_is_mmio_pfn(pfn)); |
| 1717 | kvm_x86_ops->get_mt_mask_shift(); | ||
| 1718 | mt_mask |= VMX_EPT_IGMT_BIT; | ||
| 1719 | } else | ||
| 1720 | mt_mask = MTRR_TYPE_UNCACHABLE << | ||
| 1721 | kvm_x86_ops->get_mt_mask_shift(); | ||
| 1722 | spte |= mt_mask; | ||
| 1723 | } | ||
| 1724 | 1694 | ||
| 1725 | spte |= (u64)pfn << PAGE_SHIFT; | 1695 | spte |= (u64)pfn << PAGE_SHIFT; |
| 1726 | 1696 | ||
| @@ -1765,8 +1735,8 @@ set_pte: | |||
| 1765 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1735 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
| 1766 | unsigned pt_access, unsigned pte_access, | 1736 | unsigned pt_access, unsigned pte_access, |
| 1767 | int user_fault, int write_fault, int dirty, | 1737 | int user_fault, int write_fault, int dirty, |
| 1768 | int *ptwrite, int largepage, int global, | 1738 | int *ptwrite, int largepage, gfn_t gfn, |
| 1769 | gfn_t gfn, pfn_t pfn, bool speculative) | 1739 | pfn_t pfn, bool speculative) |
| 1770 | { | 1740 | { |
| 1771 | int was_rmapped = 0; | 1741 | int was_rmapped = 0; |
| 1772 | int was_writeble = is_writeble_pte(*shadow_pte); | 1742 | int was_writeble = is_writeble_pte(*shadow_pte); |
| @@ -1795,7 +1765,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
| 1795 | was_rmapped = 1; | 1765 | was_rmapped = 1; |
| 1796 | } | 1766 | } |
| 1797 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | 1767 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, |
| 1798 | dirty, largepage, global, gfn, pfn, speculative, true)) { | 1768 | dirty, largepage, gfn, pfn, speculative, true)) { |
| 1799 | if (write_fault) | 1769 | if (write_fault) |
| 1800 | *ptwrite = 1; | 1770 | *ptwrite = 1; |
| 1801 | kvm_x86_ops->tlb_flush(vcpu); | 1771 | kvm_x86_ops->tlb_flush(vcpu); |
| @@ -1843,7 +1813,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
| 1843 | || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { | 1813 | || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { |
| 1844 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1814 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
| 1845 | 0, write, 1, &pt_write, | 1815 | 0, write, 1, &pt_write, |
| 1846 | largepage, 0, gfn, pfn, false); | 1816 | largepage, gfn, pfn, false); |
| 1847 | ++vcpu->stat.pf_fixed; | 1817 | ++vcpu->stat.pf_fixed; |
| 1848 | break; | 1818 | break; |
| 1849 | } | 1819 | } |
| @@ -1942,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
| 1942 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1912 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
| 1943 | } | 1913 | } |
| 1944 | 1914 | ||
| 1945 | static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | 1915 | static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) |
| 1916 | { | ||
| 1917 | int ret = 0; | ||
| 1918 | |||
| 1919 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { | ||
| 1920 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
| 1921 | ret = 1; | ||
| 1922 | } | ||
| 1923 | |||
| 1924 | return ret; | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | ||
| 1946 | { | 1928 | { |
| 1947 | int i; | 1929 | int i; |
| 1948 | gfn_t root_gfn; | 1930 | gfn_t root_gfn; |
| @@ -1957,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 1957 | ASSERT(!VALID_PAGE(root)); | 1939 | ASSERT(!VALID_PAGE(root)); |
| 1958 | if (tdp_enabled) | 1940 | if (tdp_enabled) |
| 1959 | direct = 1; | 1941 | direct = 1; |
| 1942 | if (mmu_check_root(vcpu, root_gfn)) | ||
| 1943 | return 1; | ||
| 1960 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1944 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
| 1961 | PT64_ROOT_LEVEL, direct, | 1945 | PT64_ROOT_LEVEL, direct, |
| 1962 | ACC_ALL, NULL); | 1946 | ACC_ALL, NULL); |
| 1963 | root = __pa(sp->spt); | 1947 | root = __pa(sp->spt); |
| 1964 | ++sp->root_count; | 1948 | ++sp->root_count; |
| 1965 | vcpu->arch.mmu.root_hpa = root; | 1949 | vcpu->arch.mmu.root_hpa = root; |
| 1966 | return; | 1950 | return 0; |
| 1967 | } | 1951 | } |
| 1968 | direct = !is_paging(vcpu); | 1952 | direct = !is_paging(vcpu); |
| 1969 | if (tdp_enabled) | 1953 | if (tdp_enabled) |
| @@ -1980,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 1980 | root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; | 1964 | root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; |
| 1981 | } else if (vcpu->arch.mmu.root_level == 0) | 1965 | } else if (vcpu->arch.mmu.root_level == 0) |
| 1982 | root_gfn = 0; | 1966 | root_gfn = 0; |
| 1967 | if (mmu_check_root(vcpu, root_gfn)) | ||
| 1968 | return 1; | ||
| 1983 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1969 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
| 1984 | PT32_ROOT_LEVEL, direct, | 1970 | PT32_ROOT_LEVEL, direct, |
| 1985 | ACC_ALL, NULL); | 1971 | ACC_ALL, NULL); |
| @@ -1988,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 1988 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 1974 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
| 1989 | } | 1975 | } |
| 1990 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 1976 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
| 1977 | return 0; | ||
| 1991 | } | 1978 | } |
| 1992 | 1979 | ||
| 1993 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | 1980 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) |
| @@ -2006,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
| 2006 | for (i = 0; i < 4; ++i) { | 1993 | for (i = 0; i < 4; ++i) { |
| 2007 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1994 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
| 2008 | 1995 | ||
| 2009 | if (root) { | 1996 | if (root && VALID_PAGE(root)) { |
| 2010 | root &= PT64_BASE_ADDR_MASK; | 1997 | root &= PT64_BASE_ADDR_MASK; |
| 2011 | sp = page_header(root); | 1998 | sp = page_header(root); |
| 2012 | mmu_sync_children(vcpu, sp); | 1999 | mmu_sync_children(vcpu, sp); |
| @@ -2014,15 +2001,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
| 2014 | } | 2001 | } |
| 2015 | } | 2002 | } |
| 2016 | 2003 | ||
| 2017 | static void mmu_sync_global(struct kvm_vcpu *vcpu) | ||
| 2018 | { | ||
| 2019 | struct kvm *kvm = vcpu->kvm; | ||
| 2020 | struct kvm_mmu_page *sp, *n; | ||
| 2021 | |||
| 2022 | list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) | ||
| 2023 | kvm_sync_page(vcpu, sp); | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2004 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
| 2027 | { | 2005 | { |
| 2028 | spin_lock(&vcpu->kvm->mmu_lock); | 2006 | spin_lock(&vcpu->kvm->mmu_lock); |
| @@ -2030,13 +2008,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
| 2030 | spin_unlock(&vcpu->kvm->mmu_lock); | 2008 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2031 | } | 2009 | } |
| 2032 | 2010 | ||
| 2033 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) | ||
| 2034 | { | ||
| 2035 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2036 | mmu_sync_global(vcpu); | ||
| 2037 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2038 | } | ||
| 2039 | |||
| 2040 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2011 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
| 2041 | { | 2012 | { |
| 2042 | return vaddr; | 2013 | return vaddr; |
| @@ -2151,6 +2122,14 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
| 2151 | nonpaging_free(vcpu); | 2122 | nonpaging_free(vcpu); |
| 2152 | } | 2123 | } |
| 2153 | 2124 | ||
| 2125 | static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) | ||
| 2126 | { | ||
| 2127 | int bit7; | ||
| 2128 | |||
| 2129 | bit7 = (gpte >> 7) & 1; | ||
| 2130 | return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; | ||
| 2131 | } | ||
| 2132 | |||
| 2154 | #define PTTYPE 64 | 2133 | #define PTTYPE 64 |
| 2155 | #include "paging_tmpl.h" | 2134 | #include "paging_tmpl.h" |
| 2156 | #undef PTTYPE | 2135 | #undef PTTYPE |
| @@ -2159,6 +2138,59 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
| 2159 | #include "paging_tmpl.h" | 2138 | #include "paging_tmpl.h" |
| 2160 | #undef PTTYPE | 2139 | #undef PTTYPE |
| 2161 | 2140 | ||
| 2141 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | ||
| 2142 | { | ||
| 2143 | struct kvm_mmu *context = &vcpu->arch.mmu; | ||
| 2144 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
| 2145 | u64 exb_bit_rsvd = 0; | ||
| 2146 | |||
| 2147 | if (!is_nx(vcpu)) | ||
| 2148 | exb_bit_rsvd = rsvd_bits(63, 63); | ||
| 2149 | switch (level) { | ||
| 2150 | case PT32_ROOT_LEVEL: | ||
| 2151 | /* no rsvd bits for 2 level 4K page table entries */ | ||
| 2152 | context->rsvd_bits_mask[0][1] = 0; | ||
| 2153 | context->rsvd_bits_mask[0][0] = 0; | ||
| 2154 | if (is_cpuid_PSE36()) | ||
| 2155 | /* 36bits PSE 4MB page */ | ||
| 2156 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | ||
| 2157 | else | ||
| 2158 | /* 32 bits PSE 4MB page */ | ||
| 2159 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | ||
| 2160 | context->rsvd_bits_mask[1][0] = ~0ull; | ||
| 2161 | break; | ||
| 2162 | case PT32E_ROOT_LEVEL: | ||
| 2163 | context->rsvd_bits_mask[0][2] = | ||
| 2164 | rsvd_bits(maxphyaddr, 63) | | ||
| 2165 | rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ | ||
| 2166 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | ||
| 2167 | rsvd_bits(maxphyaddr, 62); /* PDE */ | ||
| 2168 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | ||
| 2169 | rsvd_bits(maxphyaddr, 62); /* PTE */ | ||
| 2170 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | ||
| 2171 | rsvd_bits(maxphyaddr, 62) | | ||
| 2172 | rsvd_bits(13, 20); /* large page */ | ||
| 2173 | context->rsvd_bits_mask[1][0] = ~0ull; | ||
| 2174 | break; | ||
| 2175 | case PT64_ROOT_LEVEL: | ||
| 2176 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | ||
| 2177 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | ||
| 2178 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | | ||
| 2179 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | ||
| 2180 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | ||
| 2181 | rsvd_bits(maxphyaddr, 51); | ||
| 2182 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | ||
| 2183 | rsvd_bits(maxphyaddr, 51); | ||
| 2184 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | ||
| 2185 | context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2]; | ||
| 2186 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | ||
| 2187 | rsvd_bits(maxphyaddr, 51) | | ||
| 2188 | rsvd_bits(13, 20); /* large page */ | ||
| 2189 | context->rsvd_bits_mask[1][0] = ~0ull; | ||
| 2190 | break; | ||
| 2191 | } | ||
| 2192 | } | ||
| 2193 | |||
| 2162 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | 2194 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) |
| 2163 | { | 2195 | { |
| 2164 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2196 | struct kvm_mmu *context = &vcpu->arch.mmu; |
| @@ -2179,6 +2211,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
| 2179 | 2211 | ||
| 2180 | static int paging64_init_context(struct kvm_vcpu *vcpu) | 2212 | static int paging64_init_context(struct kvm_vcpu *vcpu) |
| 2181 | { | 2213 | { |
| 2214 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | ||
| 2182 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); | 2215 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); |
| 2183 | } | 2216 | } |
| 2184 | 2217 | ||
| @@ -2186,6 +2219,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
| 2186 | { | 2219 | { |
| 2187 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2220 | struct kvm_mmu *context = &vcpu->arch.mmu; |
| 2188 | 2221 | ||
| 2222 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | ||
| 2189 | context->new_cr3 = paging_new_cr3; | 2223 | context->new_cr3 = paging_new_cr3; |
| 2190 | context->page_fault = paging32_page_fault; | 2224 | context->page_fault = paging32_page_fault; |
| 2191 | context->gva_to_gpa = paging32_gva_to_gpa; | 2225 | context->gva_to_gpa = paging32_gva_to_gpa; |
| @@ -2201,6 +2235,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
| 2201 | 2235 | ||
| 2202 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | 2236 | static int paging32E_init_context(struct kvm_vcpu *vcpu) |
| 2203 | { | 2237 | { |
| 2238 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | ||
| 2204 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); | 2239 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
| 2205 | } | 2240 | } |
| 2206 | 2241 | ||
| @@ -2221,12 +2256,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
| 2221 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 2256 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
| 2222 | context->root_level = 0; | 2257 | context->root_level = 0; |
| 2223 | } else if (is_long_mode(vcpu)) { | 2258 | } else if (is_long_mode(vcpu)) { |
| 2259 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | ||
| 2224 | context->gva_to_gpa = paging64_gva_to_gpa; | 2260 | context->gva_to_gpa = paging64_gva_to_gpa; |
| 2225 | context->root_level = PT64_ROOT_LEVEL; | 2261 | context->root_level = PT64_ROOT_LEVEL; |
| 2226 | } else if (is_pae(vcpu)) { | 2262 | } else if (is_pae(vcpu)) { |
| 2263 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | ||
| 2227 | context->gva_to_gpa = paging64_gva_to_gpa; | 2264 | context->gva_to_gpa = paging64_gva_to_gpa; |
| 2228 | context->root_level = PT32E_ROOT_LEVEL; | 2265 | context->root_level = PT32E_ROOT_LEVEL; |
| 2229 | } else { | 2266 | } else { |
| 2267 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | ||
| 2230 | context->gva_to_gpa = paging32_gva_to_gpa; | 2268 | context->gva_to_gpa = paging32_gva_to_gpa; |
| 2231 | context->root_level = PT32_ROOT_LEVEL; | 2269 | context->root_level = PT32_ROOT_LEVEL; |
| 2232 | } | 2270 | } |
| @@ -2290,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
| 2290 | goto out; | 2328 | goto out; |
| 2291 | spin_lock(&vcpu->kvm->mmu_lock); | 2329 | spin_lock(&vcpu->kvm->mmu_lock); |
| 2292 | kvm_mmu_free_some_pages(vcpu); | 2330 | kvm_mmu_free_some_pages(vcpu); |
| 2293 | mmu_alloc_roots(vcpu); | 2331 | r = mmu_alloc_roots(vcpu); |
| 2294 | mmu_sync_roots(vcpu); | 2332 | mmu_sync_roots(vcpu); |
| 2295 | spin_unlock(&vcpu->kvm->mmu_lock); | 2333 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 2334 | if (r) | ||
| 2335 | goto out; | ||
| 2296 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 2336 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
| 2297 | kvm_mmu_flush_tlb(vcpu); | 2337 | kvm_mmu_flush_tlb(vcpu); |
| 2298 | out: | 2338 | out: |
| @@ -2638,14 +2678,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); | |||
| 2638 | 2678 | ||
| 2639 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 2679 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
| 2640 | { | 2680 | { |
| 2641 | struct kvm_mmu_page *sp; | ||
| 2642 | |||
| 2643 | while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | ||
| 2644 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, | ||
| 2645 | struct kvm_mmu_page, link); | ||
| 2646 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
| 2647 | cond_resched(); | ||
| 2648 | } | ||
| 2649 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 2681 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
| 2650 | } | 2682 | } |
| 2651 | 2683 | ||
| @@ -2710,7 +2742,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2710 | { | 2742 | { |
| 2711 | struct kvm_mmu_page *sp; | 2743 | struct kvm_mmu_page *sp; |
| 2712 | 2744 | ||
| 2713 | spin_lock(&kvm->mmu_lock); | ||
| 2714 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 2745 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
| 2715 | int i; | 2746 | int i; |
| 2716 | u64 *pt; | 2747 | u64 *pt; |
| @@ -2725,7 +2756,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2725 | pt[i] &= ~PT_WRITABLE_MASK; | 2756 | pt[i] &= ~PT_WRITABLE_MASK; |
| 2726 | } | 2757 | } |
| 2727 | kvm_flush_remote_tlbs(kvm); | 2758 | kvm_flush_remote_tlbs(kvm); |
| 2728 | spin_unlock(&kvm->mmu_lock); | ||
| 2729 | } | 2759 | } |
| 2730 | 2760 | ||
| 2731 | void kvm_mmu_zap_all(struct kvm *kvm) | 2761 | void kvm_mmu_zap_all(struct kvm *kvm) |
| @@ -3007,11 +3037,13 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
| 3007 | " in nonleaf level: levels %d gva %lx" | 3037 | " in nonleaf level: levels %d gva %lx" |
| 3008 | " level %d pte %llx\n", audit_msg, | 3038 | " level %d pte %llx\n", audit_msg, |
| 3009 | vcpu->arch.mmu.root_level, va, level, ent); | 3039 | vcpu->arch.mmu.root_level, va, level, ent); |
| 3010 | 3040 | else | |
| 3011 | audit_mappings_page(vcpu, ent, va, level - 1); | 3041 | audit_mappings_page(vcpu, ent, va, level - 1); |
| 3012 | } else { | 3042 | } else { |
| 3013 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 3043 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); |
| 3014 | hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; | 3044 | gfn_t gfn = gpa >> PAGE_SHIFT; |
| 3045 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
| 3046 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | ||
| 3015 | 3047 | ||
| 3016 | if (is_shadow_present_pte(ent) | 3048 | if (is_shadow_present_pte(ent) |
| 3017 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 3049 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index eaab2145f62b..3494a2fb136e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
| @@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
| 75 | return vcpu->arch.cr0 & X86_CR0_PG; | 75 | return vcpu->arch.cr0 & X86_CR0_PG; |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | static inline int is_present_pte(unsigned long pte) | ||
| 79 | { | ||
| 80 | return pte & PT_PRESENT_MASK; | ||
| 81 | } | ||
| 82 | |||
| 78 | #endif | 83 | #endif |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bd70206c561..258e4591e1ca 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
| 123 | gfn_t table_gfn; | 123 | gfn_t table_gfn; |
| 124 | unsigned index, pt_access, pte_access; | 124 | unsigned index, pt_access, pte_access; |
| 125 | gpa_t pte_gpa; | 125 | gpa_t pte_gpa; |
| 126 | int rsvd_fault = 0; | ||
| 126 | 127 | ||
| 127 | pgprintk("%s: addr %lx\n", __func__, addr); | 128 | pgprintk("%s: addr %lx\n", __func__, addr); |
| 128 | walk: | 129 | walk: |
| @@ -157,6 +158,10 @@ walk: | |||
| 157 | if (!is_present_pte(pte)) | 158 | if (!is_present_pte(pte)) |
| 158 | goto not_present; | 159 | goto not_present; |
| 159 | 160 | ||
| 161 | rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); | ||
| 162 | if (rsvd_fault) | ||
| 163 | goto access_error; | ||
| 164 | |||
| 160 | if (write_fault && !is_writeble_pte(pte)) | 165 | if (write_fault && !is_writeble_pte(pte)) |
| 161 | if (user_fault || is_write_protection(vcpu)) | 166 | if (user_fault || is_write_protection(vcpu)) |
| 162 | goto access_error; | 167 | goto access_error; |
| @@ -209,7 +214,6 @@ walk: | |||
| 209 | if (ret) | 214 | if (ret) |
| 210 | goto walk; | 215 | goto walk; |
| 211 | pte |= PT_DIRTY_MASK; | 216 | pte |= PT_DIRTY_MASK; |
| 212 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); | ||
| 213 | walker->ptes[walker->level - 1] = pte; | 217 | walker->ptes[walker->level - 1] = pte; |
| 214 | } | 218 | } |
| 215 | 219 | ||
| @@ -233,6 +237,8 @@ err: | |||
| 233 | walker->error_code |= PFERR_USER_MASK; | 237 | walker->error_code |= PFERR_USER_MASK; |
| 234 | if (fetch_fault) | 238 | if (fetch_fault) |
| 235 | walker->error_code |= PFERR_FETCH_MASK; | 239 | walker->error_code |= PFERR_FETCH_MASK; |
| 240 | if (rsvd_fault) | ||
| 241 | walker->error_code |= PFERR_RSVD_MASK; | ||
| 236 | return 0; | 242 | return 0; |
| 237 | } | 243 | } |
| 238 | 244 | ||
| @@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 262 | kvm_get_pfn(pfn); | 268 | kvm_get_pfn(pfn); |
| 263 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
| 264 | gpte & PT_DIRTY_MASK, NULL, largepage, | 270 | gpte & PT_DIRTY_MASK, NULL, largepage, |
| 265 | gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), | 271 | gpte_to_gfn(gpte), pfn, true); |
| 266 | pfn, true); | ||
| 267 | } | 272 | } |
| 268 | 273 | ||
| 269 | /* | 274 | /* |
| @@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 297 | user_fault, write_fault, | 302 | user_fault, write_fault, |
| 298 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
| 299 | ptwrite, largepage, | 304 | ptwrite, largepage, |
| 300 | gw->ptes[gw->level-1] & PT_GLOBAL_MASK, | ||
| 301 | gw->gfn, pfn, false); | 305 | gw->gfn, pfn, false); |
| 302 | break; | 306 | break; |
| 303 | } | 307 | } |
| @@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 380 | return r; | 384 | return r; |
| 381 | 385 | ||
| 382 | /* | 386 | /* |
| 383 | * Look up the shadow pte for the faulting address. | 387 | * Look up the guest pte for the faulting address. |
| 384 | */ | 388 | */ |
| 385 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, | 389 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, |
| 386 | fetch_fault); | 390 | fetch_fault); |
| @@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
| 586 | nr_present++; | 590 | nr_present++; |
| 587 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 591 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
| 588 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 592 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
| 589 | is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, | 593 | is_dirty_pte(gpte), 0, gfn, |
| 590 | spte_to_pfn(sp->spt[i]), true, false); | 594 | spte_to_pfn(sp->spt[i]), true, false); |
| 591 | } | 595 | } |
| 592 | 596 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1f8510c51d6e..71510e07e69e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "irq.h" | 19 | #include "irq.h" |
| 20 | #include "mmu.h" | 20 | #include "mmu.h" |
| 21 | #include "kvm_cache_regs.h" | 21 | #include "kvm_cache_regs.h" |
| 22 | #include "x86.h" | ||
| 22 | 23 | ||
| 23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
| 24 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
| @@ -69,7 +70,6 @@ module_param(npt, int, S_IRUGO); | |||
| 69 | static int nested = 0; | 70 | static int nested = 0; |
| 70 | module_param(nested, int, S_IRUGO); | 71 | module_param(nested, int, S_IRUGO); |
| 71 | 72 | ||
| 72 | static void kvm_reput_irq(struct vcpu_svm *svm); | ||
| 73 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 73 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
| 74 | 74 | ||
| 75 | static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); | 75 | static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); |
| @@ -132,24 +132,6 @@ static inline u32 svm_has(u32 feat) | |||
| 132 | return svm_features & feat; | 132 | return svm_features & feat; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | static inline u8 pop_irq(struct kvm_vcpu *vcpu) | ||
| 136 | { | ||
| 137 | int word_index = __ffs(vcpu->arch.irq_summary); | ||
| 138 | int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); | ||
| 139 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
| 140 | |||
| 141 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | ||
| 142 | if (!vcpu->arch.irq_pending[word_index]) | ||
| 143 | clear_bit(word_index, &vcpu->arch.irq_summary); | ||
| 144 | return irq; | ||
| 145 | } | ||
| 146 | |||
| 147 | static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) | ||
| 148 | { | ||
| 149 | set_bit(irq, vcpu->arch.irq_pending); | ||
| 150 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | ||
| 151 | } | ||
| 152 | |||
| 153 | static inline void clgi(void) | 135 | static inline void clgi(void) |
| 154 | { | 136 | { |
| 155 | asm volatile (__ex(SVM_CLGI)); | 137 | asm volatile (__ex(SVM_CLGI)); |
| @@ -214,17 +196,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
| 214 | svm->vmcb->control.event_inj_err = error_code; | 196 | svm->vmcb->control.event_inj_err = error_code; |
| 215 | } | 197 | } |
| 216 | 198 | ||
| 217 | static bool svm_exception_injected(struct kvm_vcpu *vcpu) | 199 | static int is_external_interrupt(u32 info) |
| 200 | { | ||
| 201 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | ||
| 202 | return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); | ||
| 203 | } | ||
| 204 | |||
| 205 | static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | ||
| 218 | { | 206 | { |
| 219 | struct vcpu_svm *svm = to_svm(vcpu); | 207 | struct vcpu_svm *svm = to_svm(vcpu); |
| 208 | u32 ret = 0; | ||
| 220 | 209 | ||
| 221 | return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID); | 210 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
| 211 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | ||
| 212 | return ret & mask; | ||
| 222 | } | 213 | } |
| 223 | 214 | ||
| 224 | static int is_external_interrupt(u32 info) | 215 | static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) |
| 225 | { | 216 | { |
| 226 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 217 | struct vcpu_svm *svm = to_svm(vcpu); |
| 227 | return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); | 218 | |
| 219 | if (mask == 0) | ||
| 220 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | ||
| 221 | else | ||
| 222 | svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; | ||
| 223 | |||
| 228 | } | 224 | } |
| 229 | 225 | ||
| 230 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | 226 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) |
| @@ -232,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 232 | struct vcpu_svm *svm = to_svm(vcpu); | 228 | struct vcpu_svm *svm = to_svm(vcpu); |
| 233 | 229 | ||
| 234 | if (!svm->next_rip) { | 230 | if (!svm->next_rip) { |
| 235 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 231 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != |
| 232 | EMULATE_DONE) | ||
| 233 | printk(KERN_DEBUG "%s: NOP\n", __func__); | ||
| 236 | return; | 234 | return; |
| 237 | } | 235 | } |
| 238 | if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) | 236 | if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) |
| @@ -240,9 +238,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 240 | __func__, kvm_rip_read(vcpu), svm->next_rip); | 238 | __func__, kvm_rip_read(vcpu), svm->next_rip); |
| 241 | 239 | ||
| 242 | kvm_rip_write(vcpu, svm->next_rip); | 240 | kvm_rip_write(vcpu, svm->next_rip); |
| 243 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | 241 | svm_set_interrupt_shadow(vcpu, 0); |
| 244 | |||
| 245 | vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK); | ||
| 246 | } | 242 | } |
| 247 | 243 | ||
| 248 | static int has_svm(void) | 244 | static int has_svm(void) |
| @@ -830,6 +826,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
| 830 | if (!var->unusable) | 826 | if (!var->unusable) |
| 831 | var->type |= 0x1; | 827 | var->type |= 0x1; |
| 832 | break; | 828 | break; |
| 829 | case VCPU_SREG_SS: | ||
| 830 | /* On AMD CPUs sometimes the DB bit in the segment | ||
| 831 | * descriptor is left as 1, although the whole segment has | ||
| 832 | * been made unusable. Clear it here to pass an Intel VMX | ||
| 833 | * entry check when cross vendor migrating. | ||
| 834 | */ | ||
| 835 | if (var->unusable) | ||
| 836 | var->db = 0; | ||
| 837 | break; | ||
| 833 | } | 838 | } |
| 834 | } | 839 | } |
| 835 | 840 | ||
| @@ -960,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
| 960 | 965 | ||
| 961 | } | 966 | } |
| 962 | 967 | ||
| 963 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 968 | static void update_db_intercept(struct kvm_vcpu *vcpu) |
| 964 | { | 969 | { |
| 965 | int old_debug = vcpu->guest_debug; | ||
| 966 | struct vcpu_svm *svm = to_svm(vcpu); | 970 | struct vcpu_svm *svm = to_svm(vcpu); |
| 967 | 971 | ||
| 968 | vcpu->guest_debug = dbg->control; | ||
| 969 | |||
| 970 | svm->vmcb->control.intercept_exceptions &= | 972 | svm->vmcb->control.intercept_exceptions &= |
| 971 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 973 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
| 974 | |||
| 975 | if (vcpu->arch.singlestep) | ||
| 976 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | ||
| 977 | |||
| 972 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 978 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
| 973 | if (vcpu->guest_debug & | 979 | if (vcpu->guest_debug & |
| 974 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | 980 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) |
| @@ -979,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
| 979 | 1 << BP_VECTOR; | 985 | 1 << BP_VECTOR; |
| 980 | } else | 986 | } else |
| 981 | vcpu->guest_debug = 0; | 987 | vcpu->guest_debug = 0; |
| 988 | } | ||
| 989 | |||
| 990 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | ||
| 991 | { | ||
| 992 | int old_debug = vcpu->guest_debug; | ||
| 993 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 994 | |||
| 995 | vcpu->guest_debug = dbg->control; | ||
| 996 | |||
| 997 | update_db_intercept(vcpu); | ||
| 982 | 998 | ||
| 983 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 999 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
| 984 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1000 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
| @@ -993,16 +1009,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
| 993 | return 0; | 1009 | return 0; |
| 994 | } | 1010 | } |
| 995 | 1011 | ||
| 996 | static int svm_get_irq(struct kvm_vcpu *vcpu) | ||
| 997 | { | ||
| 998 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 999 | u32 exit_int_info = svm->vmcb->control.exit_int_info; | ||
| 1000 | |||
| 1001 | if (is_external_interrupt(exit_int_info)) | ||
| 1002 | return exit_int_info & SVM_EVTINJ_VEC_MASK; | ||
| 1003 | return -1; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1012 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
| 1007 | { | 1013 | { |
| 1008 | #ifdef CONFIG_X86_64 | 1014 | #ifdef CONFIG_X86_64 |
| @@ -1107,17 +1113,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
| 1107 | 1113 | ||
| 1108 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1114 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1109 | { | 1115 | { |
| 1110 | u32 exit_int_info = svm->vmcb->control.exit_int_info; | ||
| 1111 | struct kvm *kvm = svm->vcpu.kvm; | ||
| 1112 | u64 fault_address; | 1116 | u64 fault_address; |
| 1113 | u32 error_code; | 1117 | u32 error_code; |
| 1114 | bool event_injection = false; | ||
| 1115 | |||
| 1116 | if (!irqchip_in_kernel(kvm) && | ||
| 1117 | is_external_interrupt(exit_int_info)) { | ||
| 1118 | event_injection = true; | ||
| 1119 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | ||
| 1120 | } | ||
| 1121 | 1118 | ||
| 1122 | fault_address = svm->vmcb->control.exit_info_2; | 1119 | fault_address = svm->vmcb->control.exit_info_2; |
| 1123 | error_code = svm->vmcb->control.exit_info_1; | 1120 | error_code = svm->vmcb->control.exit_info_1; |
| @@ -1137,23 +1134,40 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1137 | */ | 1134 | */ |
| 1138 | if (npt_enabled) | 1135 | if (npt_enabled) |
| 1139 | svm_flush_tlb(&svm->vcpu); | 1136 | svm_flush_tlb(&svm->vcpu); |
| 1140 | 1137 | else { | |
| 1141 | if (!npt_enabled && event_injection) | 1138 | if (kvm_event_needs_reinjection(&svm->vcpu)) |
| 1142 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | 1139 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); |
| 1140 | } | ||
| 1143 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1141 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
| 1144 | } | 1142 | } |
| 1145 | 1143 | ||
| 1146 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1144 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1147 | { | 1145 | { |
| 1148 | if (!(svm->vcpu.guest_debug & | 1146 | if (!(svm->vcpu.guest_debug & |
| 1149 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 1147 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
| 1148 | !svm->vcpu.arch.singlestep) { | ||
| 1150 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1149 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
| 1151 | return 1; | 1150 | return 1; |
| 1152 | } | 1151 | } |
| 1153 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1152 | |
| 1154 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1153 | if (svm->vcpu.arch.singlestep) { |
| 1155 | kvm_run->debug.arch.exception = DB_VECTOR; | 1154 | svm->vcpu.arch.singlestep = false; |
| 1156 | return 0; | 1155 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
| 1156 | svm->vmcb->save.rflags &= | ||
| 1157 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
| 1158 | update_db_intercept(&svm->vcpu); | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | if (svm->vcpu.guest_debug & | ||
| 1162 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | ||
| 1163 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
| 1164 | kvm_run->debug.arch.pc = | ||
| 1165 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | ||
| 1166 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
| 1167 | return 0; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | return 1; | ||
| 1157 | } | 1171 | } |
| 1158 | 1172 | ||
| 1159 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1173 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| @@ -1842,17 +1856,51 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
| 1842 | struct kvm_run *kvm_run) | 1856 | struct kvm_run *kvm_run) |
| 1843 | { | 1857 | { |
| 1844 | u16 tss_selector; | 1858 | u16 tss_selector; |
| 1859 | int reason; | ||
| 1860 | int int_type = svm->vmcb->control.exit_int_info & | ||
| 1861 | SVM_EXITINTINFO_TYPE_MASK; | ||
| 1862 | int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; | ||
| 1863 | uint32_t type = | ||
| 1864 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | ||
| 1865 | uint32_t idt_v = | ||
| 1866 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | ||
| 1845 | 1867 | ||
| 1846 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 1868 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
| 1869 | |||
| 1847 | if (svm->vmcb->control.exit_info_2 & | 1870 | if (svm->vmcb->control.exit_info_2 & |
| 1848 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) | 1871 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) |
| 1849 | return kvm_task_switch(&svm->vcpu, tss_selector, | 1872 | reason = TASK_SWITCH_IRET; |
| 1850 | TASK_SWITCH_IRET); | 1873 | else if (svm->vmcb->control.exit_info_2 & |
| 1851 | if (svm->vmcb->control.exit_info_2 & | 1874 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) |
| 1852 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) | 1875 | reason = TASK_SWITCH_JMP; |
| 1853 | return kvm_task_switch(&svm->vcpu, tss_selector, | 1876 | else if (idt_v) |
| 1854 | TASK_SWITCH_JMP); | 1877 | reason = TASK_SWITCH_GATE; |
| 1855 | return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); | 1878 | else |
| 1879 | reason = TASK_SWITCH_CALL; | ||
| 1880 | |||
| 1881 | if (reason == TASK_SWITCH_GATE) { | ||
| 1882 | switch (type) { | ||
| 1883 | case SVM_EXITINTINFO_TYPE_NMI: | ||
| 1884 | svm->vcpu.arch.nmi_injected = false; | ||
| 1885 | break; | ||
| 1886 | case SVM_EXITINTINFO_TYPE_EXEPT: | ||
| 1887 | kvm_clear_exception_queue(&svm->vcpu); | ||
| 1888 | break; | ||
| 1889 | case SVM_EXITINTINFO_TYPE_INTR: | ||
| 1890 | kvm_clear_interrupt_queue(&svm->vcpu); | ||
| 1891 | break; | ||
| 1892 | default: | ||
| 1893 | break; | ||
| 1894 | } | ||
| 1895 | } | ||
| 1896 | |||
| 1897 | if (reason != TASK_SWITCH_GATE || | ||
| 1898 | int_type == SVM_EXITINTINFO_TYPE_SOFT || | ||
| 1899 | (int_type == SVM_EXITINTINFO_TYPE_EXEPT && | ||
| 1900 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | ||
| 1901 | skip_emulated_instruction(&svm->vcpu); | ||
| 1902 | |||
| 1903 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | ||
| 1856 | } | 1904 | } |
| 1857 | 1905 | ||
| 1858 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1906 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| @@ -1862,6 +1910,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1862 | return 1; | 1910 | return 1; |
| 1863 | } | 1911 | } |
| 1864 | 1912 | ||
| 1913 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
| 1914 | { | ||
| 1915 | ++svm->vcpu.stat.nmi_window_exits; | ||
| 1916 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | ||
| 1917 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | ||
| 1918 | return 1; | ||
| 1919 | } | ||
| 1920 | |||
| 1865 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1921 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1866 | { | 1922 | { |
| 1867 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 1923 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) |
| @@ -1879,8 +1935,14 @@ static int emulate_on_interception(struct vcpu_svm *svm, | |||
| 1879 | 1935 | ||
| 1880 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1936 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1881 | { | 1937 | { |
| 1938 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | ||
| 1939 | /* instruction emulation calls kvm_set_cr8() */ | ||
| 1882 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 1940 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); |
| 1883 | if (irqchip_in_kernel(svm->vcpu.kvm)) | 1941 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
| 1942 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
| 1943 | return 1; | ||
| 1944 | } | ||
| 1945 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | ||
| 1884 | return 1; | 1946 | return 1; |
| 1885 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 1947 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; |
| 1886 | return 0; | 1948 | return 0; |
| @@ -2090,8 +2152,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
| 2090 | * If the user space waits to inject interrupts, exit as soon as | 2152 | * If the user space waits to inject interrupts, exit as soon as |
| 2091 | * possible | 2153 | * possible |
| 2092 | */ | 2154 | */ |
| 2093 | if (kvm_run->request_interrupt_window && | 2155 | if (!irqchip_in_kernel(svm->vcpu.kvm) && |
| 2094 | !svm->vcpu.arch.irq_summary) { | 2156 | kvm_run->request_interrupt_window && |
| 2157 | !kvm_cpu_has_interrupt(&svm->vcpu)) { | ||
| 2095 | ++svm->vcpu.stat.irq_window_exits; | 2158 | ++svm->vcpu.stat.irq_window_exits; |
| 2096 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 2159 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
| 2097 | return 0; | 2160 | return 0; |
| @@ -2134,6 +2197,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
| 2134 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2197 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
| 2135 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | 2198 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ |
| 2136 | [SVM_EXIT_CPUID] = cpuid_interception, | 2199 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 2200 | [SVM_EXIT_IRET] = iret_interception, | ||
| 2137 | [SVM_EXIT_INVD] = emulate_on_interception, | 2201 | [SVM_EXIT_INVD] = emulate_on_interception, |
| 2138 | [SVM_EXIT_HLT] = halt_interception, | 2202 | [SVM_EXIT_HLT] = halt_interception, |
| 2139 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2203 | [SVM_EXIT_INVLPG] = invlpg_interception, |
| @@ -2194,7 +2258,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 2194 | } | 2258 | } |
| 2195 | } | 2259 | } |
| 2196 | 2260 | ||
| 2197 | kvm_reput_irq(svm); | ||
| 2198 | 2261 | ||
| 2199 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2262 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
| 2200 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2263 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| @@ -2205,7 +2268,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 2205 | 2268 | ||
| 2206 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && | 2269 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && |
| 2207 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && | 2270 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && |
| 2208 | exit_code != SVM_EXIT_NPF) | 2271 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH) |
| 2209 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 2272 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " |
| 2210 | "exit_code 0x%x\n", | 2273 | "exit_code 0x%x\n", |
| 2211 | __func__, svm->vmcb->control.exit_int_info, | 2274 | __func__, svm->vmcb->control.exit_int_info, |
| @@ -2242,6 +2305,15 @@ static void pre_svm_run(struct vcpu_svm *svm) | |||
| 2242 | new_asid(svm, svm_data); | 2305 | new_asid(svm, svm_data); |
| 2243 | } | 2306 | } |
| 2244 | 2307 | ||
| 2308 | static void svm_inject_nmi(struct kvm_vcpu *vcpu) | ||
| 2309 | { | ||
| 2310 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2311 | |||
| 2312 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | ||
| 2313 | vcpu->arch.hflags |= HF_NMI_MASK; | ||
| 2314 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | ||
| 2315 | ++vcpu->stat.nmi_injections; | ||
| 2316 | } | ||
| 2245 | 2317 | ||
| 2246 | static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | 2318 | static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) |
| 2247 | { | 2319 | { |
| @@ -2257,134 +2329,71 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
| 2257 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); | 2329 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); |
| 2258 | } | 2330 | } |
| 2259 | 2331 | ||
| 2260 | static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) | 2332 | static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr) |
| 2261 | { | 2333 | { |
| 2262 | struct vcpu_svm *svm = to_svm(vcpu); | 2334 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2263 | 2335 | ||
| 2264 | nested_svm_intr(svm); | 2336 | svm->vmcb->control.event_inj = nr | |
| 2265 | 2337 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; | |
| 2266 | svm_inject_irq(svm, irq); | ||
| 2267 | } | 2338 | } |
| 2268 | 2339 | ||
| 2269 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 2340 | static void svm_set_irq(struct kvm_vcpu *vcpu) |
| 2270 | { | 2341 | { |
| 2271 | struct vcpu_svm *svm = to_svm(vcpu); | 2342 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2272 | struct vmcb *vmcb = svm->vmcb; | ||
| 2273 | int max_irr, tpr; | ||
| 2274 | 2343 | ||
| 2275 | if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) | 2344 | nested_svm_intr(svm); |
| 2276 | return; | ||
| 2277 | 2345 | ||
| 2278 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2346 | svm_queue_irq(vcpu, vcpu->arch.interrupt.nr); |
| 2347 | } | ||
| 2279 | 2348 | ||
| 2280 | max_irr = kvm_lapic_find_highest_irr(vcpu); | 2349 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
| 2281 | if (max_irr == -1) | 2350 | { |
| 2282 | return; | 2351 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2283 | 2352 | ||
| 2284 | tpr = kvm_lapic_get_cr8(vcpu) << 4; | 2353 | if (irr == -1) |
| 2354 | return; | ||
| 2285 | 2355 | ||
| 2286 | if (tpr >= (max_irr & 0xf0)) | 2356 | if (tpr >= irr) |
| 2287 | vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; | 2357 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; |
| 2288 | } | 2358 | } |
| 2289 | 2359 | ||
| 2290 | static void svm_intr_assist(struct kvm_vcpu *vcpu) | 2360 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
| 2291 | { | 2361 | { |
| 2292 | struct vcpu_svm *svm = to_svm(vcpu); | 2362 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2293 | struct vmcb *vmcb = svm->vmcb; | 2363 | struct vmcb *vmcb = svm->vmcb; |
| 2294 | int intr_vector = -1; | 2364 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
| 2295 | 2365 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | |
| 2296 | if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && | ||
| 2297 | ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { | ||
| 2298 | intr_vector = vmcb->control.exit_int_info & | ||
| 2299 | SVM_EVTINJ_VEC_MASK; | ||
| 2300 | vmcb->control.exit_int_info = 0; | ||
| 2301 | svm_inject_irq(svm, intr_vector); | ||
| 2302 | goto out; | ||
| 2303 | } | ||
| 2304 | |||
| 2305 | if (vmcb->control.int_ctl & V_IRQ_MASK) | ||
| 2306 | goto out; | ||
| 2307 | |||
| 2308 | if (!kvm_cpu_has_interrupt(vcpu)) | ||
| 2309 | goto out; | ||
| 2310 | |||
| 2311 | if (nested_svm_intr(svm)) | ||
| 2312 | goto out; | ||
| 2313 | |||
| 2314 | if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) | ||
| 2315 | goto out; | ||
| 2316 | |||
| 2317 | if (!(vmcb->save.rflags & X86_EFLAGS_IF) || | ||
| 2318 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || | ||
| 2319 | (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { | ||
| 2320 | /* unable to deliver irq, set pending irq */ | ||
| 2321 | svm_set_vintr(svm); | ||
| 2322 | svm_inject_irq(svm, 0x0); | ||
| 2323 | goto out; | ||
| 2324 | } | ||
| 2325 | /* Okay, we can deliver the interrupt: grab it and update PIC state. */ | ||
| 2326 | intr_vector = kvm_cpu_get_interrupt(vcpu); | ||
| 2327 | svm_inject_irq(svm, intr_vector); | ||
| 2328 | out: | ||
| 2329 | update_cr8_intercept(vcpu); | ||
| 2330 | } | 2366 | } |
| 2331 | 2367 | ||
| 2332 | static void kvm_reput_irq(struct vcpu_svm *svm) | 2368 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
| 2333 | { | 2369 | { |
| 2334 | struct vmcb_control_area *control = &svm->vmcb->control; | 2370 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2335 | 2371 | struct vmcb *vmcb = svm->vmcb; | |
| 2336 | if ((control->int_ctl & V_IRQ_MASK) | 2372 | return (vmcb->save.rflags & X86_EFLAGS_IF) && |
| 2337 | && !irqchip_in_kernel(svm->vcpu.kvm)) { | 2373 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
| 2338 | control->int_ctl &= ~V_IRQ_MASK; | 2374 | (svm->vcpu.arch.hflags & HF_GIF_MASK); |
| 2339 | push_irq(&svm->vcpu, control->int_vector); | ||
| 2340 | } | ||
| 2341 | |||
| 2342 | svm->vcpu.arch.interrupt_window_open = | ||
| 2343 | !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && | ||
| 2344 | (svm->vcpu.arch.hflags & HF_GIF_MASK); | ||
| 2345 | } | 2375 | } |
| 2346 | 2376 | ||
| 2347 | static void svm_do_inject_vector(struct vcpu_svm *svm) | 2377 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
| 2348 | { | 2378 | { |
| 2349 | struct kvm_vcpu *vcpu = &svm->vcpu; | 2379 | svm_set_vintr(to_svm(vcpu)); |
| 2350 | int word_index = __ffs(vcpu->arch.irq_summary); | 2380 | svm_inject_irq(to_svm(vcpu), 0x0); |
| 2351 | int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); | ||
| 2352 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
| 2353 | |||
| 2354 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | ||
| 2355 | if (!vcpu->arch.irq_pending[word_index]) | ||
| 2356 | clear_bit(word_index, &vcpu->arch.irq_summary); | ||
| 2357 | svm_inject_irq(svm, irq); | ||
| 2358 | } | 2381 | } |
| 2359 | 2382 | ||
| 2360 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 2383 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
| 2361 | struct kvm_run *kvm_run) | ||
| 2362 | { | 2384 | { |
| 2363 | struct vcpu_svm *svm = to_svm(vcpu); | 2385 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2364 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
| 2365 | |||
| 2366 | if (nested_svm_intr(svm)) | ||
| 2367 | return; | ||
| 2368 | 2386 | ||
| 2369 | svm->vcpu.arch.interrupt_window_open = | 2387 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) |
| 2370 | (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2388 | == HF_NMI_MASK) |
| 2371 | (svm->vmcb->save.rflags & X86_EFLAGS_IF) && | 2389 | return; /* IRET will cause a vm exit */ |
| 2372 | (svm->vcpu.arch.hflags & HF_GIF_MASK)); | ||
| 2373 | 2390 | ||
| 2374 | if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) | 2391 | /* Something prevents NMI from been injected. Single step over |
| 2375 | /* | 2392 | possible problem (IRET or exception injection or interrupt |
| 2376 | * If interrupts enabled, and not blocked by sti or mov ss. Good. | 2393 | shadow) */ |
| 2377 | */ | 2394 | vcpu->arch.singlestep = true; |
| 2378 | svm_do_inject_vector(svm); | 2395 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
| 2379 | 2396 | update_db_intercept(vcpu); | |
| 2380 | /* | ||
| 2381 | * Interrupts blocked. Wait for unblock. | ||
| 2382 | */ | ||
| 2383 | if (!svm->vcpu.arch.interrupt_window_open && | ||
| 2384 | (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) | ||
| 2385 | svm_set_vintr(svm); | ||
| 2386 | else | ||
| 2387 | svm_clear_vintr(svm); | ||
| 2388 | } | 2397 | } |
| 2389 | 2398 | ||
| 2390 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2399 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
| @@ -2407,7 +2416,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
| 2407 | 2416 | ||
| 2408 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2417 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
| 2409 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2418 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
| 2410 | kvm_lapic_set_tpr(vcpu, cr8); | 2419 | kvm_set_cr8(vcpu, cr8); |
| 2411 | } | 2420 | } |
| 2412 | } | 2421 | } |
| 2413 | 2422 | ||
| @@ -2416,14 +2425,54 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
| 2416 | struct vcpu_svm *svm = to_svm(vcpu); | 2425 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2417 | u64 cr8; | 2426 | u64 cr8; |
| 2418 | 2427 | ||
| 2419 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
| 2420 | return; | ||
| 2421 | |||
| 2422 | cr8 = kvm_get_cr8(vcpu); | 2428 | cr8 = kvm_get_cr8(vcpu); |
| 2423 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2429 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
| 2424 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2430 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
| 2425 | } | 2431 | } |
| 2426 | 2432 | ||
| 2433 | static void svm_complete_interrupts(struct vcpu_svm *svm) | ||
| 2434 | { | ||
| 2435 | u8 vector; | ||
| 2436 | int type; | ||
| 2437 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | ||
| 2438 | |||
| 2439 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | ||
| 2440 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | ||
| 2441 | |||
| 2442 | svm->vcpu.arch.nmi_injected = false; | ||
| 2443 | kvm_clear_exception_queue(&svm->vcpu); | ||
| 2444 | kvm_clear_interrupt_queue(&svm->vcpu); | ||
| 2445 | |||
| 2446 | if (!(exitintinfo & SVM_EXITINTINFO_VALID)) | ||
| 2447 | return; | ||
| 2448 | |||
| 2449 | vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; | ||
| 2450 | type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; | ||
| 2451 | |||
| 2452 | switch (type) { | ||
| 2453 | case SVM_EXITINTINFO_TYPE_NMI: | ||
| 2454 | svm->vcpu.arch.nmi_injected = true; | ||
| 2455 | break; | ||
| 2456 | case SVM_EXITINTINFO_TYPE_EXEPT: | ||
| 2457 | /* In case of software exception do not reinject an exception | ||
| 2458 | vector, but re-execute and instruction instead */ | ||
| 2459 | if (kvm_exception_is_soft(vector)) | ||
| 2460 | break; | ||
| 2461 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | ||
| 2462 | u32 err = svm->vmcb->control.exit_int_info_err; | ||
| 2463 | kvm_queue_exception_e(&svm->vcpu, vector, err); | ||
| 2464 | |||
| 2465 | } else | ||
| 2466 | kvm_queue_exception(&svm->vcpu, vector); | ||
| 2467 | break; | ||
| 2468 | case SVM_EXITINTINFO_TYPE_INTR: | ||
| 2469 | kvm_queue_interrupt(&svm->vcpu, vector, false); | ||
| 2470 | break; | ||
| 2471 | default: | ||
| 2472 | break; | ||
| 2473 | } | ||
| 2474 | } | ||
| 2475 | |||
| 2427 | #ifdef CONFIG_X86_64 | 2476 | #ifdef CONFIG_X86_64 |
| 2428 | #define R "r" | 2477 | #define R "r" |
| 2429 | #else | 2478 | #else |
| @@ -2552,6 +2601,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2552 | sync_cr8_to_lapic(vcpu); | 2601 | sync_cr8_to_lapic(vcpu); |
| 2553 | 2602 | ||
| 2554 | svm->next_rip = 0; | 2603 | svm->next_rip = 0; |
| 2604 | |||
| 2605 | svm_complete_interrupts(svm); | ||
| 2555 | } | 2606 | } |
| 2556 | 2607 | ||
| 2557 | #undef R | 2608 | #undef R |
| @@ -2617,7 +2668,7 @@ static int get_npt_level(void) | |||
| 2617 | #endif | 2668 | #endif |
| 2618 | } | 2669 | } |
| 2619 | 2670 | ||
| 2620 | static int svm_get_mt_mask_shift(void) | 2671 | static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
| 2621 | { | 2672 | { |
| 2622 | return 0; | 2673 | return 0; |
| 2623 | } | 2674 | } |
| @@ -2667,17 +2718,21 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 2667 | .run = svm_vcpu_run, | 2718 | .run = svm_vcpu_run, |
| 2668 | .handle_exit = handle_exit, | 2719 | .handle_exit = handle_exit, |
| 2669 | .skip_emulated_instruction = skip_emulated_instruction, | 2720 | .skip_emulated_instruction = skip_emulated_instruction, |
| 2721 | .set_interrupt_shadow = svm_set_interrupt_shadow, | ||
| 2722 | .get_interrupt_shadow = svm_get_interrupt_shadow, | ||
| 2670 | .patch_hypercall = svm_patch_hypercall, | 2723 | .patch_hypercall = svm_patch_hypercall, |
| 2671 | .get_irq = svm_get_irq, | ||
| 2672 | .set_irq = svm_set_irq, | 2724 | .set_irq = svm_set_irq, |
| 2725 | .set_nmi = svm_inject_nmi, | ||
| 2673 | .queue_exception = svm_queue_exception, | 2726 | .queue_exception = svm_queue_exception, |
| 2674 | .exception_injected = svm_exception_injected, | 2727 | .interrupt_allowed = svm_interrupt_allowed, |
| 2675 | .inject_pending_irq = svm_intr_assist, | 2728 | .nmi_allowed = svm_nmi_allowed, |
| 2676 | .inject_pending_vectors = do_interrupt_requests, | 2729 | .enable_nmi_window = enable_nmi_window, |
| 2730 | .enable_irq_window = enable_irq_window, | ||
| 2731 | .update_cr8_intercept = update_cr8_intercept, | ||
| 2677 | 2732 | ||
| 2678 | .set_tss_addr = svm_set_tss_addr, | 2733 | .set_tss_addr = svm_set_tss_addr, |
| 2679 | .get_tdp_level = get_npt_level, | 2734 | .get_tdp_level = get_npt_level, |
| 2680 | .get_mt_mask_shift = svm_get_mt_mask_shift, | 2735 | .get_mt_mask = svm_get_mt_mask, |
| 2681 | }; | 2736 | }; |
| 2682 | 2737 | ||
| 2683 | static int __init svm_init(void) | 2738 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c new file mode 100644 index 000000000000..86dbac072d0c --- /dev/null +++ b/arch/x86/kvm/timer.c | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #include <linux/kvm_host.h> | ||
| 2 | #include <linux/kvm.h> | ||
| 3 | #include <linux/hrtimer.h> | ||
| 4 | #include <asm/atomic.h> | ||
| 5 | #include "kvm_timer.h" | ||
| 6 | |||
| 7 | static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | ||
| 8 | { | ||
| 9 | int restart_timer = 0; | ||
| 10 | wait_queue_head_t *q = &vcpu->wq; | ||
| 11 | |||
| 12 | /* FIXME: this code should not know anything about vcpus */ | ||
| 13 | if (!atomic_inc_and_test(&ktimer->pending)) | ||
| 14 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | ||
| 15 | |||
| 16 | if (!ktimer->reinject) | ||
| 17 | atomic_set(&ktimer->pending, 1); | ||
| 18 | |||
| 19 | if (waitqueue_active(q)) | ||
| 20 | wake_up_interruptible(q); | ||
| 21 | |||
| 22 | if (ktimer->t_ops->is_periodic(ktimer)) { | ||
| 23 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
| 24 | restart_timer = 1; | ||
| 25 | } | ||
| 26 | |||
| 27 | return restart_timer; | ||
| 28 | } | ||
| 29 | |||
| 30 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) | ||
| 31 | { | ||
| 32 | int restart_timer; | ||
| 33 | struct kvm_vcpu *vcpu; | ||
| 34 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
| 35 | |||
| 36 | vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id]; | ||
| 37 | if (!vcpu) | ||
| 38 | return HRTIMER_NORESTART; | ||
| 39 | |||
| 40 | restart_timer = __kvm_timer_fn(vcpu, ktimer); | ||
| 41 | if (restart_timer) | ||
| 42 | return HRTIMER_RESTART; | ||
| 43 | else | ||
| 44 | return HRTIMER_NORESTART; | ||
| 45 | } | ||
| 46 | |||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bb481330716f..32d6ae8fb60e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -32,26 +32,27 @@ | |||
| 32 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
| 33 | #include <asm/vmx.h> | 33 | #include <asm/vmx.h> |
| 34 | #include <asm/virtext.h> | 34 | #include <asm/virtext.h> |
| 35 | #include <asm/mce.h> | ||
| 35 | 36 | ||
| 36 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 37 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
| 37 | 38 | ||
| 38 | MODULE_AUTHOR("Qumranet"); | 39 | MODULE_AUTHOR("Qumranet"); |
| 39 | MODULE_LICENSE("GPL"); | 40 | MODULE_LICENSE("GPL"); |
| 40 | 41 | ||
| 41 | static int bypass_guest_pf = 1; | 42 | static int __read_mostly bypass_guest_pf = 1; |
| 42 | module_param(bypass_guest_pf, bool, 0); | 43 | module_param(bypass_guest_pf, bool, S_IRUGO); |
| 43 | 44 | ||
| 44 | static int enable_vpid = 1; | 45 | static int __read_mostly enable_vpid = 1; |
| 45 | module_param(enable_vpid, bool, 0); | 46 | module_param_named(vpid, enable_vpid, bool, 0444); |
| 46 | 47 | ||
| 47 | static int flexpriority_enabled = 1; | 48 | static int __read_mostly flexpriority_enabled = 1; |
| 48 | module_param(flexpriority_enabled, bool, 0); | 49 | module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); |
| 49 | 50 | ||
| 50 | static int enable_ept = 1; | 51 | static int __read_mostly enable_ept = 1; |
| 51 | module_param(enable_ept, bool, 0); | 52 | module_param_named(ept, enable_ept, bool, S_IRUGO); |
| 52 | 53 | ||
| 53 | static int emulate_invalid_guest_state = 0; | 54 | static int __read_mostly emulate_invalid_guest_state = 0; |
| 54 | module_param(emulate_invalid_guest_state, bool, 0); | 55 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
| 55 | 56 | ||
| 56 | struct vmcs { | 57 | struct vmcs { |
| 57 | u32 revision_id; | 58 | u32 revision_id; |
| @@ -97,6 +98,7 @@ struct vcpu_vmx { | |||
| 97 | int soft_vnmi_blocked; | 98 | int soft_vnmi_blocked; |
| 98 | ktime_t entry_time; | 99 | ktime_t entry_time; |
| 99 | s64 vnmi_blocked_time; | 100 | s64 vnmi_blocked_time; |
| 101 | u32 exit_reason; | ||
| 100 | }; | 102 | }; |
| 101 | 103 | ||
| 102 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 104 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
| @@ -111,9 +113,10 @@ static DEFINE_PER_CPU(struct vmcs *, vmxarea); | |||
| 111 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 113 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
| 112 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | 114 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); |
| 113 | 115 | ||
| 114 | static struct page *vmx_io_bitmap_a; | 116 | static unsigned long *vmx_io_bitmap_a; |
| 115 | static struct page *vmx_io_bitmap_b; | 117 | static unsigned long *vmx_io_bitmap_b; |
| 116 | static struct page *vmx_msr_bitmap; | 118 | static unsigned long *vmx_msr_bitmap_legacy; |
| 119 | static unsigned long *vmx_msr_bitmap_longmode; | ||
| 117 | 120 | ||
| 118 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | 121 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); |
| 119 | static DEFINE_SPINLOCK(vmx_vpid_lock); | 122 | static DEFINE_SPINLOCK(vmx_vpid_lock); |
| @@ -213,70 +216,78 @@ static inline int is_external_interrupt(u32 intr_info) | |||
| 213 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 216 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
| 214 | } | 217 | } |
| 215 | 218 | ||
| 219 | static inline int is_machine_check(u32 intr_info) | ||
| 220 | { | ||
| 221 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | ||
| 222 | INTR_INFO_VALID_MASK)) == | ||
| 223 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | ||
| 224 | } | ||
| 225 | |||
| 216 | static inline int cpu_has_vmx_msr_bitmap(void) | 226 | static inline int cpu_has_vmx_msr_bitmap(void) |
| 217 | { | 227 | { |
| 218 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); | 228 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
| 219 | } | 229 | } |
| 220 | 230 | ||
| 221 | static inline int cpu_has_vmx_tpr_shadow(void) | 231 | static inline int cpu_has_vmx_tpr_shadow(void) |
| 222 | { | 232 | { |
| 223 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); | 233 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
| 224 | } | 234 | } |
| 225 | 235 | ||
| 226 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 236 | static inline int vm_need_tpr_shadow(struct kvm *kvm) |
| 227 | { | 237 | { |
| 228 | return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm))); | 238 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
| 229 | } | 239 | } |
| 230 | 240 | ||
| 231 | static inline int cpu_has_secondary_exec_ctrls(void) | 241 | static inline int cpu_has_secondary_exec_ctrls(void) |
| 232 | { | 242 | { |
| 233 | return (vmcs_config.cpu_based_exec_ctrl & | 243 | return vmcs_config.cpu_based_exec_ctrl & |
| 234 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); | 244 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| 235 | } | 245 | } |
| 236 | 246 | ||
| 237 | static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | 247 | static inline bool cpu_has_vmx_virtualize_apic_accesses(void) |
| 238 | { | 248 | { |
| 239 | return flexpriority_enabled | 249 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 240 | && (vmcs_config.cpu_based_2nd_exec_ctrl & | 250 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
| 241 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 251 | } |
| 252 | |||
| 253 | static inline bool cpu_has_vmx_flexpriority(void) | ||
| 254 | { | ||
| 255 | return cpu_has_vmx_tpr_shadow() && | ||
| 256 | cpu_has_vmx_virtualize_apic_accesses(); | ||
| 242 | } | 257 | } |
| 243 | 258 | ||
| 244 | static inline int cpu_has_vmx_invept_individual_addr(void) | 259 | static inline int cpu_has_vmx_invept_individual_addr(void) |
| 245 | { | 260 | { |
| 246 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); | 261 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
| 247 | } | 262 | } |
| 248 | 263 | ||
| 249 | static inline int cpu_has_vmx_invept_context(void) | 264 | static inline int cpu_has_vmx_invept_context(void) |
| 250 | { | 265 | { |
| 251 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); | 266 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); |
| 252 | } | 267 | } |
| 253 | 268 | ||
| 254 | static inline int cpu_has_vmx_invept_global(void) | 269 | static inline int cpu_has_vmx_invept_global(void) |
| 255 | { | 270 | { |
| 256 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); | 271 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); |
| 257 | } | 272 | } |
| 258 | 273 | ||
| 259 | static inline int cpu_has_vmx_ept(void) | 274 | static inline int cpu_has_vmx_ept(void) |
| 260 | { | 275 | { |
| 261 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | 276 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 262 | SECONDARY_EXEC_ENABLE_EPT); | 277 | SECONDARY_EXEC_ENABLE_EPT; |
| 263 | } | ||
| 264 | |||
| 265 | static inline int vm_need_ept(void) | ||
| 266 | { | ||
| 267 | return (cpu_has_vmx_ept() && enable_ept); | ||
| 268 | } | 278 | } |
| 269 | 279 | ||
| 270 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 280 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
| 271 | { | 281 | { |
| 272 | return ((cpu_has_vmx_virtualize_apic_accesses()) && | 282 | return flexpriority_enabled && |
| 273 | (irqchip_in_kernel(kvm))); | 283 | (cpu_has_vmx_virtualize_apic_accesses()) && |
| 284 | (irqchip_in_kernel(kvm)); | ||
| 274 | } | 285 | } |
| 275 | 286 | ||
| 276 | static inline int cpu_has_vmx_vpid(void) | 287 | static inline int cpu_has_vmx_vpid(void) |
| 277 | { | 288 | { |
| 278 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | 289 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
| 279 | SECONDARY_EXEC_ENABLE_VPID); | 290 | SECONDARY_EXEC_ENABLE_VPID; |
| 280 | } | 291 | } |
| 281 | 292 | ||
| 282 | static inline int cpu_has_virtual_nmis(void) | 293 | static inline int cpu_has_virtual_nmis(void) |
| @@ -284,6 +295,11 @@ static inline int cpu_has_virtual_nmis(void) | |||
| 284 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 295 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
| 285 | } | 296 | } |
| 286 | 297 | ||
| 298 | static inline bool report_flexpriority(void) | ||
| 299 | { | ||
| 300 | return flexpriority_enabled; | ||
| 301 | } | ||
| 302 | |||
| 287 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | 303 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) |
| 288 | { | 304 | { |
| 289 | int i; | 305 | int i; |
| @@ -381,7 +397,7 @@ static inline void ept_sync_global(void) | |||
| 381 | 397 | ||
| 382 | static inline void ept_sync_context(u64 eptp) | 398 | static inline void ept_sync_context(u64 eptp) |
| 383 | { | 399 | { |
| 384 | if (vm_need_ept()) { | 400 | if (enable_ept) { |
| 385 | if (cpu_has_vmx_invept_context()) | 401 | if (cpu_has_vmx_invept_context()) |
| 386 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); | 402 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); |
| 387 | else | 403 | else |
| @@ -391,7 +407,7 @@ static inline void ept_sync_context(u64 eptp) | |||
| 391 | 407 | ||
| 392 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | 408 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) |
| 393 | { | 409 | { |
| 394 | if (vm_need_ept()) { | 410 | if (enable_ept) { |
| 395 | if (cpu_has_vmx_invept_individual_addr()) | 411 | if (cpu_has_vmx_invept_individual_addr()) |
| 396 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, | 412 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, |
| 397 | eptp, gpa); | 413 | eptp, gpa); |
| @@ -478,7 +494,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 478 | { | 494 | { |
| 479 | u32 eb; | 495 | u32 eb; |
| 480 | 496 | ||
| 481 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); | 497 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); |
| 482 | if (!vcpu->fpu_active) | 498 | if (!vcpu->fpu_active) |
| 483 | eb |= 1u << NM_VECTOR; | 499 | eb |= 1u << NM_VECTOR; |
| 484 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 500 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
| @@ -488,9 +504,9 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 488 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 504 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
| 489 | eb |= 1u << BP_VECTOR; | 505 | eb |= 1u << BP_VECTOR; |
| 490 | } | 506 | } |
| 491 | if (vcpu->arch.rmode.active) | 507 | if (vcpu->arch.rmode.vm86_active) |
| 492 | eb = ~0; | 508 | eb = ~0; |
| 493 | if (vm_need_ept()) | 509 | if (enable_ept) |
| 494 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 510 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
| 495 | vmcs_write32(EXCEPTION_BITMAP, eb); | 511 | vmcs_write32(EXCEPTION_BITMAP, eb); |
| 496 | } | 512 | } |
| @@ -724,29 +740,50 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
| 724 | 740 | ||
| 725 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 741 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
| 726 | { | 742 | { |
| 727 | if (vcpu->arch.rmode.active) | 743 | if (vcpu->arch.rmode.vm86_active) |
| 728 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 744 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
| 729 | vmcs_writel(GUEST_RFLAGS, rflags); | 745 | vmcs_writel(GUEST_RFLAGS, rflags); |
| 730 | } | 746 | } |
| 731 | 747 | ||
| 748 | static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | ||
| 749 | { | ||
| 750 | u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
| 751 | int ret = 0; | ||
| 752 | |||
| 753 | if (interruptibility & GUEST_INTR_STATE_STI) | ||
| 754 | ret |= X86_SHADOW_INT_STI; | ||
| 755 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | ||
| 756 | ret |= X86_SHADOW_INT_MOV_SS; | ||
| 757 | |||
| 758 | return ret & mask; | ||
| 759 | } | ||
| 760 | |||
| 761 | static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | ||
| 762 | { | ||
| 763 | u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
| 764 | u32 interruptibility = interruptibility_old; | ||
| 765 | |||
| 766 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | ||
| 767 | |||
| 768 | if (mask & X86_SHADOW_INT_MOV_SS) | ||
| 769 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | ||
| 770 | if (mask & X86_SHADOW_INT_STI) | ||
| 771 | interruptibility |= GUEST_INTR_STATE_STI; | ||
| 772 | |||
| 773 | if ((interruptibility != interruptibility_old)) | ||
| 774 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); | ||
| 775 | } | ||
| 776 | |||
| 732 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | 777 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) |
| 733 | { | 778 | { |
| 734 | unsigned long rip; | 779 | unsigned long rip; |
| 735 | u32 interruptibility; | ||
| 736 | 780 | ||
| 737 | rip = kvm_rip_read(vcpu); | 781 | rip = kvm_rip_read(vcpu); |
| 738 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 782 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 739 | kvm_rip_write(vcpu, rip); | 783 | kvm_rip_write(vcpu, rip); |
| 740 | 784 | ||
| 741 | /* | 785 | /* skipping an emulated instruction also counts */ |
| 742 | * We emulated an instruction, so temporary interrupt blocking | 786 | vmx_set_interrupt_shadow(vcpu, 0); |
| 743 | * should be removed, if set. | ||
| 744 | */ | ||
| 745 | interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
| 746 | if (interruptibility & 3) | ||
| 747 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | ||
| 748 | interruptibility & ~3); | ||
| 749 | vcpu->arch.interrupt_window_open = 1; | ||
| 750 | } | 787 | } |
| 751 | 788 | ||
| 752 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 789 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
| @@ -760,7 +797,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
| 760 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; | 797 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; |
| 761 | } | 798 | } |
| 762 | 799 | ||
| 763 | if (vcpu->arch.rmode.active) { | 800 | if (vcpu->arch.rmode.vm86_active) { |
| 764 | vmx->rmode.irq.pending = true; | 801 | vmx->rmode.irq.pending = true; |
| 765 | vmx->rmode.irq.vector = nr; | 802 | vmx->rmode.irq.vector = nr; |
| 766 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 803 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
| @@ -773,8 +810,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
| 773 | return; | 810 | return; |
| 774 | } | 811 | } |
| 775 | 812 | ||
| 776 | if (nr == BP_VECTOR || nr == OF_VECTOR) { | 813 | if (kvm_exception_is_soft(nr)) { |
| 777 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | 814 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
| 815 | vmx->vcpu.arch.event_exit_inst_len); | ||
| 778 | intr_info |= INTR_TYPE_SOFT_EXCEPTION; | 816 | intr_info |= INTR_TYPE_SOFT_EXCEPTION; |
| 779 | } else | 817 | } else |
| 780 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 818 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
| @@ -782,11 +820,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
| 782 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 820 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
| 783 | } | 821 | } |
| 784 | 822 | ||
| 785 | static bool vmx_exception_injected(struct kvm_vcpu *vcpu) | ||
| 786 | { | ||
| 787 | return false; | ||
| 788 | } | ||
| 789 | |||
| 790 | /* | 823 | /* |
| 791 | * Swap MSR entry in host/guest MSR entry array. | 824 | * Swap MSR entry in host/guest MSR entry array. |
| 792 | */ | 825 | */ |
| @@ -812,6 +845,7 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
| 812 | static void setup_msrs(struct vcpu_vmx *vmx) | 845 | static void setup_msrs(struct vcpu_vmx *vmx) |
| 813 | { | 846 | { |
| 814 | int save_nmsrs; | 847 | int save_nmsrs; |
| 848 | unsigned long *msr_bitmap; | ||
| 815 | 849 | ||
| 816 | vmx_load_host_state(vmx); | 850 | vmx_load_host_state(vmx); |
| 817 | save_nmsrs = 0; | 851 | save_nmsrs = 0; |
| @@ -847,6 +881,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 847 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | 881 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); |
| 848 | #endif | 882 | #endif |
| 849 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | 883 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); |
| 884 | |||
| 885 | if (cpu_has_vmx_msr_bitmap()) { | ||
| 886 | if (is_long_mode(&vmx->vcpu)) | ||
| 887 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
| 888 | else | ||
| 889 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
| 890 | |||
| 891 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
| 892 | } | ||
| 850 | } | 893 | } |
| 851 | 894 | ||
| 852 | /* | 895 | /* |
| @@ -1034,13 +1077,6 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
| 1034 | return 0; | 1077 | return 0; |
| 1035 | } | 1078 | } |
| 1036 | 1079 | ||
| 1037 | static int vmx_get_irq(struct kvm_vcpu *vcpu) | ||
| 1038 | { | ||
| 1039 | if (!vcpu->arch.interrupt.pending) | ||
| 1040 | return -1; | ||
| 1041 | return vcpu->arch.interrupt.nr; | ||
| 1042 | } | ||
| 1043 | |||
| 1044 | static __init int cpu_has_kvm_support(void) | 1080 | static __init int cpu_has_kvm_support(void) |
| 1045 | { | 1081 | { |
| 1046 | return cpu_has_vmx(); | 1082 | return cpu_has_vmx(); |
| @@ -1294,6 +1330,18 @@ static __init int hardware_setup(void) | |||
| 1294 | if (boot_cpu_has(X86_FEATURE_NX)) | 1330 | if (boot_cpu_has(X86_FEATURE_NX)) |
| 1295 | kvm_enable_efer_bits(EFER_NX); | 1331 | kvm_enable_efer_bits(EFER_NX); |
| 1296 | 1332 | ||
| 1333 | if (!cpu_has_vmx_vpid()) | ||
| 1334 | enable_vpid = 0; | ||
| 1335 | |||
| 1336 | if (!cpu_has_vmx_ept()) | ||
| 1337 | enable_ept = 0; | ||
| 1338 | |||
| 1339 | if (!cpu_has_vmx_flexpriority()) | ||
| 1340 | flexpriority_enabled = 0; | ||
| 1341 | |||
| 1342 | if (!cpu_has_vmx_tpr_shadow()) | ||
| 1343 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
| 1344 | |||
| 1297 | return alloc_kvm_area(); | 1345 | return alloc_kvm_area(); |
| 1298 | } | 1346 | } |
| 1299 | 1347 | ||
| @@ -1324,7 +1372,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
| 1324 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1372 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 1325 | 1373 | ||
| 1326 | vmx->emulation_required = 1; | 1374 | vmx->emulation_required = 1; |
| 1327 | vcpu->arch.rmode.active = 0; | 1375 | vcpu->arch.rmode.vm86_active = 0; |
| 1328 | 1376 | ||
| 1329 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); | 1377 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); |
| 1330 | vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); | 1378 | vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); |
| @@ -1386,7 +1434,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 1386 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1434 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 1387 | 1435 | ||
| 1388 | vmx->emulation_required = 1; | 1436 | vmx->emulation_required = 1; |
| 1389 | vcpu->arch.rmode.active = 1; | 1437 | vcpu->arch.rmode.vm86_active = 1; |
| 1390 | 1438 | ||
| 1391 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1439 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
| 1392 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1440 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
| @@ -1485,7 +1533,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
| 1485 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 1533 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
| 1486 | { | 1534 | { |
| 1487 | vpid_sync_vcpu_all(to_vmx(vcpu)); | 1535 | vpid_sync_vcpu_all(to_vmx(vcpu)); |
| 1488 | if (vm_need_ept()) | 1536 | if (enable_ept) |
| 1489 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1537 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
| 1490 | } | 1538 | } |
| 1491 | 1539 | ||
| @@ -1555,10 +1603,10 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 1555 | 1603 | ||
| 1556 | vmx_fpu_deactivate(vcpu); | 1604 | vmx_fpu_deactivate(vcpu); |
| 1557 | 1605 | ||
| 1558 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) | 1606 | if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE)) |
| 1559 | enter_pmode(vcpu); | 1607 | enter_pmode(vcpu); |
| 1560 | 1608 | ||
| 1561 | if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE)) | 1609 | if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE)) |
| 1562 | enter_rmode(vcpu); | 1610 | enter_rmode(vcpu); |
| 1563 | 1611 | ||
| 1564 | #ifdef CONFIG_X86_64 | 1612 | #ifdef CONFIG_X86_64 |
| @@ -1570,7 +1618,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 1570 | } | 1618 | } |
| 1571 | #endif | 1619 | #endif |
| 1572 | 1620 | ||
| 1573 | if (vm_need_ept()) | 1621 | if (enable_ept) |
| 1574 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 1622 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
| 1575 | 1623 | ||
| 1576 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1624 | vmcs_writel(CR0_READ_SHADOW, cr0); |
| @@ -1599,7 +1647,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 1599 | u64 eptp; | 1647 | u64 eptp; |
| 1600 | 1648 | ||
| 1601 | guest_cr3 = cr3; | 1649 | guest_cr3 = cr3; |
| 1602 | if (vm_need_ept()) { | 1650 | if (enable_ept) { |
| 1603 | eptp = construct_eptp(cr3); | 1651 | eptp = construct_eptp(cr3); |
| 1604 | vmcs_write64(EPT_POINTER, eptp); | 1652 | vmcs_write64(EPT_POINTER, eptp); |
| 1605 | ept_sync_context(eptp); | 1653 | ept_sync_context(eptp); |
| @@ -1616,11 +1664,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 1616 | 1664 | ||
| 1617 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1665 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
| 1618 | { | 1666 | { |
| 1619 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? | 1667 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ? |
| 1620 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1668 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
| 1621 | 1669 | ||
| 1622 | vcpu->arch.cr4 = cr4; | 1670 | vcpu->arch.cr4 = cr4; |
| 1623 | if (vm_need_ept()) | 1671 | if (enable_ept) |
| 1624 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | 1672 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); |
| 1625 | 1673 | ||
| 1626 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1674 | vmcs_writel(CR4_READ_SHADOW, cr4); |
| @@ -1699,7 +1747,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 1699 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1747 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
| 1700 | u32 ar; | 1748 | u32 ar; |
| 1701 | 1749 | ||
| 1702 | if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) { | 1750 | if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) { |
| 1703 | vcpu->arch.rmode.tr.selector = var->selector; | 1751 | vcpu->arch.rmode.tr.selector = var->selector; |
| 1704 | vcpu->arch.rmode.tr.base = var->base; | 1752 | vcpu->arch.rmode.tr.base = var->base; |
| 1705 | vcpu->arch.rmode.tr.limit = var->limit; | 1753 | vcpu->arch.rmode.tr.limit = var->limit; |
| @@ -1709,7 +1757,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 1709 | vmcs_writel(sf->base, var->base); | 1757 | vmcs_writel(sf->base, var->base); |
| 1710 | vmcs_write32(sf->limit, var->limit); | 1758 | vmcs_write32(sf->limit, var->limit); |
| 1711 | vmcs_write16(sf->selector, var->selector); | 1759 | vmcs_write16(sf->selector, var->selector); |
| 1712 | if (vcpu->arch.rmode.active && var->s) { | 1760 | if (vcpu->arch.rmode.vm86_active && var->s) { |
| 1713 | /* | 1761 | /* |
| 1714 | * Hack real-mode segments into vm86 compatibility. | 1762 | * Hack real-mode segments into vm86 compatibility. |
| 1715 | */ | 1763 | */ |
| @@ -1982,7 +2030,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
| 1982 | pfn_t identity_map_pfn; | 2030 | pfn_t identity_map_pfn; |
| 1983 | u32 tmp; | 2031 | u32 tmp; |
| 1984 | 2032 | ||
| 1985 | if (!vm_need_ept()) | 2033 | if (!enable_ept) |
| 1986 | return 1; | 2034 | return 1; |
| 1987 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { | 2035 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { |
| 1988 | printk(KERN_ERR "EPT: identity-mapping pagetable " | 2036 | printk(KERN_ERR "EPT: identity-mapping pagetable " |
| @@ -2071,7 +2119,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
| 2071 | int vpid; | 2119 | int vpid; |
| 2072 | 2120 | ||
| 2073 | vmx->vpid = 0; | 2121 | vmx->vpid = 0; |
| 2074 | if (!enable_vpid || !cpu_has_vmx_vpid()) | 2122 | if (!enable_vpid) |
| 2075 | return; | 2123 | return; |
| 2076 | spin_lock(&vmx_vpid_lock); | 2124 | spin_lock(&vmx_vpid_lock); |
| 2077 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); | 2125 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); |
| @@ -2082,9 +2130,9 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
| 2082 | spin_unlock(&vmx_vpid_lock); | 2130 | spin_unlock(&vmx_vpid_lock); |
| 2083 | } | 2131 | } |
| 2084 | 2132 | ||
| 2085 | static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | 2133 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
| 2086 | { | 2134 | { |
| 2087 | void *va; | 2135 | int f = sizeof(unsigned long); |
| 2088 | 2136 | ||
| 2089 | if (!cpu_has_vmx_msr_bitmap()) | 2137 | if (!cpu_has_vmx_msr_bitmap()) |
| 2090 | return; | 2138 | return; |
| @@ -2094,16 +2142,21 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | |||
| 2094 | * have the write-low and read-high bitmap offsets the wrong way round. | 2142 | * have the write-low and read-high bitmap offsets the wrong way round. |
| 2095 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | 2143 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
| 2096 | */ | 2144 | */ |
| 2097 | va = kmap(msr_bitmap); | ||
| 2098 | if (msr <= 0x1fff) { | 2145 | if (msr <= 0x1fff) { |
| 2099 | __clear_bit(msr, va + 0x000); /* read-low */ | 2146 | __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ |
| 2100 | __clear_bit(msr, va + 0x800); /* write-low */ | 2147 | __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ |
| 2101 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | 2148 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
| 2102 | msr &= 0x1fff; | 2149 | msr &= 0x1fff; |
| 2103 | __clear_bit(msr, va + 0x400); /* read-high */ | 2150 | __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ |
| 2104 | __clear_bit(msr, va + 0xc00); /* write-high */ | 2151 | __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ |
| 2105 | } | 2152 | } |
| 2106 | kunmap(msr_bitmap); | 2153 | } |
| 2154 | |||
| 2155 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | ||
| 2156 | { | ||
| 2157 | if (!longmode_only) | ||
| 2158 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); | ||
| 2159 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); | ||
| 2107 | } | 2160 | } |
| 2108 | 2161 | ||
| 2109 | /* | 2162 | /* |
| @@ -2121,11 +2174,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2121 | u32 exec_control; | 2174 | u32 exec_control; |
| 2122 | 2175 | ||
| 2123 | /* I/O */ | 2176 | /* I/O */ |
| 2124 | vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); | 2177 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); |
| 2125 | vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); | 2178 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); |
| 2126 | 2179 | ||
| 2127 | if (cpu_has_vmx_msr_bitmap()) | 2180 | if (cpu_has_vmx_msr_bitmap()) |
| 2128 | vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); | 2181 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
| 2129 | 2182 | ||
| 2130 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 2183 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
| 2131 | 2184 | ||
| @@ -2141,7 +2194,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2141 | CPU_BASED_CR8_LOAD_EXITING; | 2194 | CPU_BASED_CR8_LOAD_EXITING; |
| 2142 | #endif | 2195 | #endif |
| 2143 | } | 2196 | } |
| 2144 | if (!vm_need_ept()) | 2197 | if (!enable_ept) |
| 2145 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | 2198 | exec_control |= CPU_BASED_CR3_STORE_EXITING | |
| 2146 | CPU_BASED_CR3_LOAD_EXITING | | 2199 | CPU_BASED_CR3_LOAD_EXITING | |
| 2147 | CPU_BASED_INVLPG_EXITING; | 2200 | CPU_BASED_INVLPG_EXITING; |
| @@ -2154,7 +2207,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2154 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2207 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
| 2155 | if (vmx->vpid == 0) | 2208 | if (vmx->vpid == 0) |
| 2156 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2209 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
| 2157 | if (!vm_need_ept()) | 2210 | if (!enable_ept) |
| 2158 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2211 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
| 2159 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2212 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
| 2160 | } | 2213 | } |
| @@ -2273,7 +2326,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2273 | goto out; | 2326 | goto out; |
| 2274 | } | 2327 | } |
| 2275 | 2328 | ||
| 2276 | vmx->vcpu.arch.rmode.active = 0; | 2329 | vmx->vcpu.arch.rmode.vm86_active = 0; |
| 2277 | 2330 | ||
| 2278 | vmx->soft_vnmi_blocked = 0; | 2331 | vmx->soft_vnmi_blocked = 0; |
| 2279 | 2332 | ||
| @@ -2402,14 +2455,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
| 2402 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2455 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
| 2403 | } | 2456 | } |
| 2404 | 2457 | ||
| 2405 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | 2458 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
| 2406 | { | 2459 | { |
| 2407 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2460 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2461 | uint32_t intr; | ||
| 2462 | int irq = vcpu->arch.interrupt.nr; | ||
| 2408 | 2463 | ||
| 2409 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); | 2464 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); |
| 2410 | 2465 | ||
| 2411 | ++vcpu->stat.irq_injections; | 2466 | ++vcpu->stat.irq_injections; |
| 2412 | if (vcpu->arch.rmode.active) { | 2467 | if (vcpu->arch.rmode.vm86_active) { |
| 2413 | vmx->rmode.irq.pending = true; | 2468 | vmx->rmode.irq.pending = true; |
| 2414 | vmx->rmode.irq.vector = irq; | 2469 | vmx->rmode.irq.vector = irq; |
| 2415 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 2470 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
| @@ -2419,8 +2474,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
| 2419 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | 2474 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); |
| 2420 | return; | 2475 | return; |
| 2421 | } | 2476 | } |
| 2422 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2477 | intr = irq | INTR_INFO_VALID_MASK; |
| 2423 | irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 2478 | if (vcpu->arch.interrupt.soft) { |
| 2479 | intr |= INTR_TYPE_SOFT_INTR; | ||
| 2480 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
| 2481 | vmx->vcpu.arch.event_exit_inst_len); | ||
| 2482 | } else | ||
| 2483 | intr |= INTR_TYPE_EXT_INTR; | ||
| 2484 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | ||
| 2424 | } | 2485 | } |
| 2425 | 2486 | ||
| 2426 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2487 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
| @@ -2441,7 +2502,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 2441 | } | 2502 | } |
| 2442 | 2503 | ||
| 2443 | ++vcpu->stat.nmi_injections; | 2504 | ++vcpu->stat.nmi_injections; |
| 2444 | if (vcpu->arch.rmode.active) { | 2505 | if (vcpu->arch.rmode.vm86_active) { |
| 2445 | vmx->rmode.irq.pending = true; | 2506 | vmx->rmode.irq.pending = true; |
| 2446 | vmx->rmode.irq.vector = NMI_VECTOR; | 2507 | vmx->rmode.irq.vector = NMI_VECTOR; |
| 2447 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 2508 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
| @@ -2456,76 +2517,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 2456 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2517 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
| 2457 | } | 2518 | } |
| 2458 | 2519 | ||
| 2459 | static void vmx_update_window_states(struct kvm_vcpu *vcpu) | 2520 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
| 2460 | { | 2521 | { |
| 2461 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
| 2462 | |||
| 2463 | vcpu->arch.nmi_window_open = | ||
| 2464 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
| 2465 | GUEST_INTR_STATE_MOV_SS | | ||
| 2466 | GUEST_INTR_STATE_NMI)); | ||
| 2467 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | 2522 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) |
| 2468 | vcpu->arch.nmi_window_open = 0; | 2523 | return 0; |
| 2469 | |||
| 2470 | vcpu->arch.interrupt_window_open = | ||
| 2471 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
| 2472 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
| 2473 | GUEST_INTR_STATE_MOV_SS))); | ||
| 2474 | } | ||
| 2475 | |||
| 2476 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | ||
| 2477 | { | ||
| 2478 | int word_index = __ffs(vcpu->arch.irq_summary); | ||
| 2479 | int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); | ||
| 2480 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
| 2481 | 2524 | ||
| 2482 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | 2525 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
| 2483 | if (!vcpu->arch.irq_pending[word_index]) | 2526 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | |
| 2484 | clear_bit(word_index, &vcpu->arch.irq_summary); | 2527 | GUEST_INTR_STATE_NMI)); |
| 2485 | kvm_queue_interrupt(vcpu, irq); | ||
| 2486 | } | 2528 | } |
| 2487 | 2529 | ||
| 2488 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 2530 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
| 2489 | struct kvm_run *kvm_run) | ||
| 2490 | { | 2531 | { |
| 2491 | vmx_update_window_states(vcpu); | 2532 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
| 2492 | 2533 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | |
| 2493 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 2534 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); |
| 2494 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
| 2495 | GUEST_INTR_STATE_STI | | ||
| 2496 | GUEST_INTR_STATE_MOV_SS); | ||
| 2497 | |||
| 2498 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { | ||
| 2499 | if (vcpu->arch.interrupt.pending) { | ||
| 2500 | enable_nmi_window(vcpu); | ||
| 2501 | } else if (vcpu->arch.nmi_window_open) { | ||
| 2502 | vcpu->arch.nmi_pending = false; | ||
| 2503 | vcpu->arch.nmi_injected = true; | ||
| 2504 | } else { | ||
| 2505 | enable_nmi_window(vcpu); | ||
| 2506 | return; | ||
| 2507 | } | ||
| 2508 | } | ||
| 2509 | if (vcpu->arch.nmi_injected) { | ||
| 2510 | vmx_inject_nmi(vcpu); | ||
| 2511 | if (vcpu->arch.nmi_pending) | ||
| 2512 | enable_nmi_window(vcpu); | ||
| 2513 | else if (vcpu->arch.irq_summary | ||
| 2514 | || kvm_run->request_interrupt_window) | ||
| 2515 | enable_irq_window(vcpu); | ||
| 2516 | return; | ||
| 2517 | } | ||
| 2518 | |||
| 2519 | if (vcpu->arch.interrupt_window_open) { | ||
| 2520 | if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) | ||
| 2521 | kvm_do_inject_irq(vcpu); | ||
| 2522 | |||
| 2523 | if (vcpu->arch.interrupt.pending) | ||
| 2524 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
| 2525 | } | ||
| 2526 | if (!vcpu->arch.interrupt_window_open && | ||
| 2527 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) | ||
| 2528 | enable_irq_window(vcpu); | ||
| 2529 | } | 2535 | } |
| 2530 | 2536 | ||
| 2531 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2537 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) |
| @@ -2585,6 +2591,31 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
| 2585 | return 0; | 2591 | return 0; |
| 2586 | } | 2592 | } |
| 2587 | 2593 | ||
| 2594 | /* | ||
| 2595 | * Trigger machine check on the host. We assume all the MSRs are already set up | ||
| 2596 | * by the CPU and that we still run on the same CPU as the MCE occurred on. | ||
| 2597 | * We pass a fake environment to the machine check handler because we want | ||
| 2598 | * the guest to be always treated like user space, no matter what context | ||
| 2599 | * it used internally. | ||
| 2600 | */ | ||
| 2601 | static void kvm_machine_check(void) | ||
| 2602 | { | ||
| 2603 | #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) | ||
| 2604 | struct pt_regs regs = { | ||
| 2605 | .cs = 3, /* Fake ring 3 no matter what the guest ran on */ | ||
| 2606 | .flags = X86_EFLAGS_IF, | ||
| 2607 | }; | ||
| 2608 | |||
| 2609 | do_machine_check(®s, 0); | ||
| 2610 | #endif | ||
| 2611 | } | ||
| 2612 | |||
| 2613 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
| 2614 | { | ||
| 2615 | /* already handled by vcpu_run */ | ||
| 2616 | return 1; | ||
| 2617 | } | ||
| 2618 | |||
| 2588 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2619 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2589 | { | 2620 | { |
| 2590 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2621 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -2596,17 +2627,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2596 | vect_info = vmx->idt_vectoring_info; | 2627 | vect_info = vmx->idt_vectoring_info; |
| 2597 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2628 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 2598 | 2629 | ||
| 2630 | if (is_machine_check(intr_info)) | ||
| 2631 | return handle_machine_check(vcpu, kvm_run); | ||
| 2632 | |||
| 2599 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2633 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
| 2600 | !is_page_fault(intr_info)) | 2634 | !is_page_fault(intr_info)) |
| 2601 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2635 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " |
| 2602 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2636 | "intr info 0x%x\n", __func__, vect_info, intr_info); |
| 2603 | 2637 | ||
| 2604 | if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { | ||
| 2605 | int irq = vect_info & VECTORING_INFO_VECTOR_MASK; | ||
| 2606 | set_bit(irq, vcpu->arch.irq_pending); | ||
| 2607 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | ||
| 2608 | } | ||
| 2609 | |||
| 2610 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2638 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
| 2611 | return 1; /* already handled by vmx_vcpu_run() */ | 2639 | return 1; /* already handled by vmx_vcpu_run() */ |
| 2612 | 2640 | ||
| @@ -2628,17 +2656,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2628 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2656 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
| 2629 | if (is_page_fault(intr_info)) { | 2657 | if (is_page_fault(intr_info)) { |
| 2630 | /* EPT won't cause page fault directly */ | 2658 | /* EPT won't cause page fault directly */ |
| 2631 | if (vm_need_ept()) | 2659 | if (enable_ept) |
| 2632 | BUG(); | 2660 | BUG(); |
| 2633 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2661 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
| 2634 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2662 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
| 2635 | (u32)((u64)cr2 >> 32), handler); | 2663 | (u32)((u64)cr2 >> 32), handler); |
| 2636 | if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending) | 2664 | if (kvm_event_needs_reinjection(vcpu)) |
| 2637 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 2665 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
| 2638 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2666 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
| 2639 | } | 2667 | } |
| 2640 | 2668 | ||
| 2641 | if (vcpu->arch.rmode.active && | 2669 | if (vcpu->arch.rmode.vm86_active && |
| 2642 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | 2670 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, |
| 2643 | error_code)) { | 2671 | error_code)) { |
| 2644 | if (vcpu->arch.halt_request) { | 2672 | if (vcpu->arch.halt_request) { |
| @@ -2753,13 +2781,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2753 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); | 2781 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); |
| 2754 | skip_emulated_instruction(vcpu); | 2782 | skip_emulated_instruction(vcpu); |
| 2755 | return 1; | 2783 | return 1; |
| 2756 | case 8: | 2784 | case 8: { |
| 2757 | kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); | 2785 | u8 cr8_prev = kvm_get_cr8(vcpu); |
| 2758 | skip_emulated_instruction(vcpu); | 2786 | u8 cr8 = kvm_register_read(vcpu, reg); |
| 2759 | if (irqchip_in_kernel(vcpu->kvm)) | 2787 | kvm_set_cr8(vcpu, cr8); |
| 2760 | return 1; | 2788 | skip_emulated_instruction(vcpu); |
| 2761 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2789 | if (irqchip_in_kernel(vcpu->kvm)) |
| 2762 | return 0; | 2790 | return 1; |
| 2791 | if (cr8_prev <= cr8) | ||
| 2792 | return 1; | ||
| 2793 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | ||
| 2794 | return 0; | ||
| 2795 | } | ||
| 2763 | }; | 2796 | }; |
| 2764 | break; | 2797 | break; |
| 2765 | case 2: /* clts */ | 2798 | case 2: /* clts */ |
| @@ -2957,8 +2990,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
| 2957 | * If the user space waits to inject interrupts, exit as soon as | 2990 | * If the user space waits to inject interrupts, exit as soon as |
| 2958 | * possible | 2991 | * possible |
| 2959 | */ | 2992 | */ |
| 2960 | if (kvm_run->request_interrupt_window && | 2993 | if (!irqchip_in_kernel(vcpu->kvm) && |
| 2961 | !vcpu->arch.irq_summary) { | 2994 | kvm_run->request_interrupt_window && |
| 2995 | !kvm_cpu_has_interrupt(vcpu)) { | ||
| 2962 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 2996 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
| 2963 | return 0; | 2997 | return 0; |
| 2964 | } | 2998 | } |
| @@ -2980,7 +3014,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2980 | 3014 | ||
| 2981 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3015 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2982 | { | 3016 | { |
| 2983 | u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 3017 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 2984 | 3018 | ||
| 2985 | kvm_mmu_invlpg(vcpu, exit_qualification); | 3019 | kvm_mmu_invlpg(vcpu, exit_qualification); |
| 2986 | skip_emulated_instruction(vcpu); | 3020 | skip_emulated_instruction(vcpu); |
| @@ -2996,11 +3030,11 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2996 | 3030 | ||
| 2997 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3031 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2998 | { | 3032 | { |
| 2999 | u64 exit_qualification; | 3033 | unsigned long exit_qualification; |
| 3000 | enum emulation_result er; | 3034 | enum emulation_result er; |
| 3001 | unsigned long offset; | 3035 | unsigned long offset; |
| 3002 | 3036 | ||
| 3003 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 3037 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 3004 | offset = exit_qualification & 0xffful; | 3038 | offset = exit_qualification & 0xffful; |
| 3005 | 3039 | ||
| 3006 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3040 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
| @@ -3019,22 +3053,41 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3019 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3053 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3020 | unsigned long exit_qualification; | 3054 | unsigned long exit_qualification; |
| 3021 | u16 tss_selector; | 3055 | u16 tss_selector; |
| 3022 | int reason; | 3056 | int reason, type, idt_v; |
| 3057 | |||
| 3058 | idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); | ||
| 3059 | type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); | ||
| 3023 | 3060 | ||
| 3024 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3061 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 3025 | 3062 | ||
| 3026 | reason = (u32)exit_qualification >> 30; | 3063 | reason = (u32)exit_qualification >> 30; |
| 3027 | if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && | 3064 | if (reason == TASK_SWITCH_GATE && idt_v) { |
| 3028 | (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | 3065 | switch (type) { |
| 3029 | (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) | 3066 | case INTR_TYPE_NMI_INTR: |
| 3030 | == INTR_TYPE_NMI_INTR) { | 3067 | vcpu->arch.nmi_injected = false; |
| 3031 | vcpu->arch.nmi_injected = false; | 3068 | if (cpu_has_virtual_nmis()) |
| 3032 | if (cpu_has_virtual_nmis()) | 3069 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 3033 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3070 | GUEST_INTR_STATE_NMI); |
| 3034 | GUEST_INTR_STATE_NMI); | 3071 | break; |
| 3072 | case INTR_TYPE_EXT_INTR: | ||
| 3073 | case INTR_TYPE_SOFT_INTR: | ||
| 3074 | kvm_clear_interrupt_queue(vcpu); | ||
| 3075 | break; | ||
| 3076 | case INTR_TYPE_HARD_EXCEPTION: | ||
| 3077 | case INTR_TYPE_SOFT_EXCEPTION: | ||
| 3078 | kvm_clear_exception_queue(vcpu); | ||
| 3079 | break; | ||
| 3080 | default: | ||
| 3081 | break; | ||
| 3082 | } | ||
| 3035 | } | 3083 | } |
| 3036 | tss_selector = exit_qualification; | 3084 | tss_selector = exit_qualification; |
| 3037 | 3085 | ||
| 3086 | if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && | ||
| 3087 | type != INTR_TYPE_EXT_INTR && | ||
| 3088 | type != INTR_TYPE_NMI_INTR)) | ||
| 3089 | skip_emulated_instruction(vcpu); | ||
| 3090 | |||
| 3038 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3091 | if (!kvm_task_switch(vcpu, tss_selector, reason)) |
| 3039 | return 0; | 3092 | return 0; |
| 3040 | 3093 | ||
| @@ -3051,11 +3104,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3051 | 3104 | ||
| 3052 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3105 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 3053 | { | 3106 | { |
| 3054 | u64 exit_qualification; | 3107 | unsigned long exit_qualification; |
| 3055 | gpa_t gpa; | 3108 | gpa_t gpa; |
| 3056 | int gla_validity; | 3109 | int gla_validity; |
| 3057 | 3110 | ||
| 3058 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 3111 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 3059 | 3112 | ||
| 3060 | if (exit_qualification & (1 << 6)) { | 3113 | if (exit_qualification & (1 << 6)) { |
| 3061 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); | 3114 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); |
| @@ -3067,7 +3120,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3067 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); | 3120 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); |
| 3068 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | 3121 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", |
| 3069 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | 3122 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), |
| 3070 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | 3123 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
| 3071 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3124 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
| 3072 | (long unsigned int)exit_qualification); | 3125 | (long unsigned int)exit_qualification); |
| 3073 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3126 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; |
| @@ -3150,6 +3203,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
| 3150 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3203 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
| 3151 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 3204 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
| 3152 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3205 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
| 3206 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | ||
| 3153 | }; | 3207 | }; |
| 3154 | 3208 | ||
| 3155 | static const int kvm_vmx_max_exit_handlers = | 3209 | static const int kvm_vmx_max_exit_handlers = |
| @@ -3159,10 +3213,10 @@ static const int kvm_vmx_max_exit_handlers = | |||
| 3159 | * The guest has exited. See if we can fix it or if we need userspace | 3213 | * The guest has exited. See if we can fix it or if we need userspace |
| 3160 | * assistance. | 3214 | * assistance. |
| 3161 | */ | 3215 | */ |
| 3162 | static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3216 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
| 3163 | { | 3217 | { |
| 3164 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
| 3165 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3218 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3219 | u32 exit_reason = vmx->exit_reason; | ||
| 3166 | u32 vectoring_info = vmx->idt_vectoring_info; | 3220 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 3167 | 3221 | ||
| 3168 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), | 3222 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), |
| @@ -3178,7 +3232,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3178 | 3232 | ||
| 3179 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3233 | /* Access CR3 don't cause VMExit in paging mode, so we need |
| 3180 | * to sync with guest real CR3. */ | 3234 | * to sync with guest real CR3. */ |
| 3181 | if (vm_need_ept() && is_paging(vcpu)) { | 3235 | if (enable_ept && is_paging(vcpu)) { |
| 3182 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3236 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
| 3183 | ept_load_pdptrs(vcpu); | 3237 | ept_load_pdptrs(vcpu); |
| 3184 | } | 3238 | } |
| @@ -3199,9 +3253,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3199 | __func__, vectoring_info, exit_reason); | 3253 | __func__, vectoring_info, exit_reason); |
| 3200 | 3254 | ||
| 3201 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { | 3255 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { |
| 3202 | if (vcpu->arch.interrupt_window_open) { | 3256 | if (vmx_interrupt_allowed(vcpu)) { |
| 3203 | vmx->soft_vnmi_blocked = 0; | 3257 | vmx->soft_vnmi_blocked = 0; |
| 3204 | vcpu->arch.nmi_window_open = 1; | ||
| 3205 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | 3258 | } else if (vmx->vnmi_blocked_time > 1000000000LL && |
| 3206 | vcpu->arch.nmi_pending) { | 3259 | vcpu->arch.nmi_pending) { |
| 3207 | /* | 3260 | /* |
| @@ -3214,7 +3267,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3214 | "state on VCPU %d after 1 s timeout\n", | 3267 | "state on VCPU %d after 1 s timeout\n", |
| 3215 | __func__, vcpu->vcpu_id); | 3268 | __func__, vcpu->vcpu_id); |
| 3216 | vmx->soft_vnmi_blocked = 0; | 3269 | vmx->soft_vnmi_blocked = 0; |
| 3217 | vmx->vcpu.arch.nmi_window_open = 1; | ||
| 3218 | } | 3270 | } |
| 3219 | } | 3271 | } |
| 3220 | 3272 | ||
| @@ -3228,122 +3280,107 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3228 | return 0; | 3280 | return 0; |
| 3229 | } | 3281 | } |
| 3230 | 3282 | ||
| 3231 | static void update_tpr_threshold(struct kvm_vcpu *vcpu) | 3283 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
| 3232 | { | 3284 | { |
| 3233 | int max_irr, tpr; | 3285 | if (irr == -1 || tpr < irr) { |
| 3234 | |||
| 3235 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
| 3236 | return; | ||
| 3237 | |||
| 3238 | if (!kvm_lapic_enabled(vcpu) || | ||
| 3239 | ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) { | ||
| 3240 | vmcs_write32(TPR_THRESHOLD, 0); | 3286 | vmcs_write32(TPR_THRESHOLD, 0); |
| 3241 | return; | 3287 | return; |
| 3242 | } | 3288 | } |
| 3243 | 3289 | ||
| 3244 | tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4; | 3290 | vmcs_write32(TPR_THRESHOLD, irr); |
| 3245 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); | ||
| 3246 | } | 3291 | } |
| 3247 | 3292 | ||
| 3248 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 3293 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
| 3249 | { | 3294 | { |
| 3250 | u32 exit_intr_info; | 3295 | u32 exit_intr_info; |
| 3251 | u32 idt_vectoring_info; | 3296 | u32 idt_vectoring_info = vmx->idt_vectoring_info; |
| 3252 | bool unblock_nmi; | 3297 | bool unblock_nmi; |
| 3253 | u8 vector; | 3298 | u8 vector; |
| 3254 | int type; | 3299 | int type; |
| 3255 | bool idtv_info_valid; | 3300 | bool idtv_info_valid; |
| 3256 | u32 error; | ||
| 3257 | 3301 | ||
| 3258 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3302 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 3303 | |||
| 3304 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
| 3305 | |||
| 3306 | /* Handle machine checks before interrupts are enabled */ | ||
| 3307 | if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) | ||
| 3308 | || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI | ||
| 3309 | && is_machine_check(exit_intr_info))) | ||
| 3310 | kvm_machine_check(); | ||
| 3311 | |||
| 3312 | /* We need to handle NMIs before interrupts are enabled */ | ||
| 3313 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | ||
| 3314 | (exit_intr_info & INTR_INFO_VALID_MASK)) { | ||
| 3315 | KVMTRACE_0D(NMI, &vmx->vcpu, handler); | ||
| 3316 | asm("int $2"); | ||
| 3317 | } | ||
| 3318 | |||
| 3319 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | ||
| 3320 | |||
| 3259 | if (cpu_has_virtual_nmis()) { | 3321 | if (cpu_has_virtual_nmis()) { |
| 3260 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; | 3322 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
| 3261 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 3323 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
| 3262 | /* | 3324 | /* |
| 3263 | * SDM 3: 25.7.1.2 | 3325 | * SDM 3: 27.7.1.2 (September 2008) |
| 3264 | * Re-set bit "block by NMI" before VM entry if vmexit caused by | 3326 | * Re-set bit "block by NMI" before VM entry if vmexit caused by |
| 3265 | * a guest IRET fault. | 3327 | * a guest IRET fault. |
| 3328 | * SDM 3: 23.2.2 (September 2008) | ||
| 3329 | * Bit 12 is undefined in any of the following cases: | ||
| 3330 | * If the VM exit sets the valid bit in the IDT-vectoring | ||
| 3331 | * information field. | ||
| 3332 | * If the VM exit is due to a double fault. | ||
| 3266 | */ | 3333 | */ |
| 3267 | if (unblock_nmi && vector != DF_VECTOR) | 3334 | if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && |
| 3335 | vector != DF_VECTOR && !idtv_info_valid) | ||
| 3268 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3336 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 3269 | GUEST_INTR_STATE_NMI); | 3337 | GUEST_INTR_STATE_NMI); |
| 3270 | } else if (unlikely(vmx->soft_vnmi_blocked)) | 3338 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
| 3271 | vmx->vnmi_blocked_time += | 3339 | vmx->vnmi_blocked_time += |
| 3272 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 3340 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
| 3273 | 3341 | ||
| 3274 | idt_vectoring_info = vmx->idt_vectoring_info; | 3342 | vmx->vcpu.arch.nmi_injected = false; |
| 3275 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3343 | kvm_clear_exception_queue(&vmx->vcpu); |
| 3344 | kvm_clear_interrupt_queue(&vmx->vcpu); | ||
| 3345 | |||
| 3346 | if (!idtv_info_valid) | ||
| 3347 | return; | ||
| 3348 | |||
| 3276 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | 3349 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; |
| 3277 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | 3350 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; |
| 3278 | if (vmx->vcpu.arch.nmi_injected) { | 3351 | |
| 3352 | switch (type) { | ||
| 3353 | case INTR_TYPE_NMI_INTR: | ||
| 3354 | vmx->vcpu.arch.nmi_injected = true; | ||
| 3279 | /* | 3355 | /* |
| 3280 | * SDM 3: 25.7.1.2 | 3356 | * SDM 3: 27.7.1.2 (September 2008) |
| 3281 | * Clear bit "block by NMI" before VM entry if a NMI delivery | 3357 | * Clear bit "block by NMI" before VM entry if a NMI |
| 3282 | * faulted. | 3358 | * delivery faulted. |
| 3283 | */ | 3359 | */ |
| 3284 | if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) | 3360 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 3285 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | 3361 | GUEST_INTR_STATE_NMI); |
| 3286 | GUEST_INTR_STATE_NMI); | 3362 | break; |
| 3287 | else | 3363 | case INTR_TYPE_SOFT_EXCEPTION: |
| 3288 | vmx->vcpu.arch.nmi_injected = false; | 3364 | vmx->vcpu.arch.event_exit_inst_len = |
| 3289 | } | 3365 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 3290 | kvm_clear_exception_queue(&vmx->vcpu); | 3366 | /* fall through */ |
| 3291 | if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || | 3367 | case INTR_TYPE_HARD_EXCEPTION: |
| 3292 | type == INTR_TYPE_SOFT_EXCEPTION)) { | ||
| 3293 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 3368 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
| 3294 | error = vmcs_read32(IDT_VECTORING_ERROR_CODE); | 3369 | u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); |
| 3295 | kvm_queue_exception_e(&vmx->vcpu, vector, error); | 3370 | kvm_queue_exception_e(&vmx->vcpu, vector, err); |
| 3296 | } else | 3371 | } else |
| 3297 | kvm_queue_exception(&vmx->vcpu, vector); | 3372 | kvm_queue_exception(&vmx->vcpu, vector); |
| 3298 | vmx->idt_vectoring_info = 0; | 3373 | break; |
| 3299 | } | 3374 | case INTR_TYPE_SOFT_INTR: |
| 3300 | kvm_clear_interrupt_queue(&vmx->vcpu); | 3375 | vmx->vcpu.arch.event_exit_inst_len = |
| 3301 | if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { | 3376 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 3302 | kvm_queue_interrupt(&vmx->vcpu, vector); | 3377 | /* fall through */ |
| 3303 | vmx->idt_vectoring_info = 0; | 3378 | case INTR_TYPE_EXT_INTR: |
| 3304 | } | 3379 | kvm_queue_interrupt(&vmx->vcpu, vector, |
| 3305 | } | 3380 | type == INTR_TYPE_SOFT_INTR); |
| 3306 | 3381 | break; | |
| 3307 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | 3382 | default: |
| 3308 | { | 3383 | break; |
| 3309 | update_tpr_threshold(vcpu); | ||
| 3310 | |||
| 3311 | vmx_update_window_states(vcpu); | ||
| 3312 | |||
| 3313 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 3314 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
| 3315 | GUEST_INTR_STATE_STI | | ||
| 3316 | GUEST_INTR_STATE_MOV_SS); | ||
| 3317 | |||
| 3318 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { | ||
| 3319 | if (vcpu->arch.interrupt.pending) { | ||
| 3320 | enable_nmi_window(vcpu); | ||
| 3321 | } else if (vcpu->arch.nmi_window_open) { | ||
| 3322 | vcpu->arch.nmi_pending = false; | ||
| 3323 | vcpu->arch.nmi_injected = true; | ||
| 3324 | } else { | ||
| 3325 | enable_nmi_window(vcpu); | ||
| 3326 | return; | ||
| 3327 | } | ||
| 3328 | } | ||
| 3329 | if (vcpu->arch.nmi_injected) { | ||
| 3330 | vmx_inject_nmi(vcpu); | ||
| 3331 | if (vcpu->arch.nmi_pending) | ||
| 3332 | enable_nmi_window(vcpu); | ||
| 3333 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
| 3334 | enable_irq_window(vcpu); | ||
| 3335 | return; | ||
| 3336 | } | ||
| 3337 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { | ||
| 3338 | if (vcpu->arch.interrupt_window_open) | ||
| 3339 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); | ||
| 3340 | else | ||
| 3341 | enable_irq_window(vcpu); | ||
| 3342 | } | ||
| 3343 | if (vcpu->arch.interrupt.pending) { | ||
| 3344 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
| 3345 | if (kvm_cpu_has_interrupt(vcpu)) | ||
| 3346 | enable_irq_window(vcpu); | ||
| 3347 | } | 3384 | } |
| 3348 | } | 3385 | } |
| 3349 | 3386 | ||
| @@ -3381,7 +3418,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
| 3381 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3418 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 3382 | { | 3419 | { |
| 3383 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3420 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3384 | u32 intr_info; | ||
| 3385 | 3421 | ||
| 3386 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3422 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
| 3387 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3423 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
| @@ -3505,20 +3541,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3505 | if (vmx->rmode.irq.pending) | 3541 | if (vmx->rmode.irq.pending) |
| 3506 | fixup_rmode_irq(vmx); | 3542 | fixup_rmode_irq(vmx); |
| 3507 | 3543 | ||
| 3508 | vmx_update_window_states(vcpu); | ||
| 3509 | |||
| 3510 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 3544 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
| 3511 | vmx->launched = 1; | 3545 | vmx->launched = 1; |
| 3512 | 3546 | ||
| 3513 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
| 3514 | |||
| 3515 | /* We need to handle NMIs before interrupts are enabled */ | ||
| 3516 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | ||
| 3517 | (intr_info & INTR_INFO_VALID_MASK)) { | ||
| 3518 | KVMTRACE_0D(NMI, vcpu, handler); | ||
| 3519 | asm("int $2"); | ||
| 3520 | } | ||
| 3521 | |||
| 3522 | vmx_complete_interrupts(vmx); | 3547 | vmx_complete_interrupts(vmx); |
| 3523 | } | 3548 | } |
| 3524 | 3549 | ||
| @@ -3593,7 +3618,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 3593 | if (alloc_apic_access_page(kvm) != 0) | 3618 | if (alloc_apic_access_page(kvm) != 0) |
| 3594 | goto free_vmcs; | 3619 | goto free_vmcs; |
| 3595 | 3620 | ||
| 3596 | if (vm_need_ept()) | 3621 | if (enable_ept) |
| 3597 | if (alloc_identity_pagetable(kvm) != 0) | 3622 | if (alloc_identity_pagetable(kvm) != 0) |
| 3598 | goto free_vmcs; | 3623 | goto free_vmcs; |
| 3599 | 3624 | ||
| @@ -3631,9 +3656,32 @@ static int get_ept_level(void) | |||
| 3631 | return VMX_EPT_DEFAULT_GAW + 1; | 3656 | return VMX_EPT_DEFAULT_GAW + 1; |
| 3632 | } | 3657 | } |
| 3633 | 3658 | ||
| 3634 | static int vmx_get_mt_mask_shift(void) | 3659 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
| 3635 | { | 3660 | { |
| 3636 | return VMX_EPT_MT_EPTE_SHIFT; | 3661 | u64 ret; |
| 3662 | |||
| 3663 | /* For VT-d and EPT combination | ||
| 3664 | * 1. MMIO: always map as UC | ||
| 3665 | * 2. EPT with VT-d: | ||
| 3666 | * a. VT-d without snooping control feature: can't guarantee the | ||
| 3667 | * result, try to trust guest. | ||
| 3668 | * b. VT-d with snooping control feature: snooping control feature of | ||
| 3669 | * VT-d engine can guarantee the cache correctness. Just set it | ||
| 3670 | * to WB to keep consistent with host. So the same as item 3. | ||
| 3671 | * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep | ||
| 3672 | * consistent with host MTRR | ||
| 3673 | */ | ||
| 3674 | if (is_mmio) | ||
| 3675 | ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; | ||
| 3676 | else if (vcpu->kvm->arch.iommu_domain && | ||
| 3677 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) | ||
| 3678 | ret = kvm_get_guest_memory_type(vcpu, gfn) << | ||
| 3679 | VMX_EPT_MT_EPTE_SHIFT; | ||
| 3680 | else | ||
| 3681 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | ||
| 3682 | | VMX_EPT_IGMT_BIT; | ||
| 3683 | |||
| 3684 | return ret; | ||
| 3637 | } | 3685 | } |
| 3638 | 3686 | ||
| 3639 | static struct kvm_x86_ops vmx_x86_ops = { | 3687 | static struct kvm_x86_ops vmx_x86_ops = { |
| @@ -3644,7 +3692,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 3644 | .check_processor_compatibility = vmx_check_processor_compat, | 3692 | .check_processor_compatibility = vmx_check_processor_compat, |
| 3645 | .hardware_enable = hardware_enable, | 3693 | .hardware_enable = hardware_enable, |
| 3646 | .hardware_disable = hardware_disable, | 3694 | .hardware_disable = hardware_disable, |
| 3647 | .cpu_has_accelerated_tpr = cpu_has_vmx_virtualize_apic_accesses, | 3695 | .cpu_has_accelerated_tpr = report_flexpriority, |
| 3648 | 3696 | ||
| 3649 | .vcpu_create = vmx_create_vcpu, | 3697 | .vcpu_create = vmx_create_vcpu, |
| 3650 | .vcpu_free = vmx_free_vcpu, | 3698 | .vcpu_free = vmx_free_vcpu, |
| @@ -3678,78 +3726,82 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 3678 | .tlb_flush = vmx_flush_tlb, | 3726 | .tlb_flush = vmx_flush_tlb, |
| 3679 | 3727 | ||
| 3680 | .run = vmx_vcpu_run, | 3728 | .run = vmx_vcpu_run, |
| 3681 | .handle_exit = kvm_handle_exit, | 3729 | .handle_exit = vmx_handle_exit, |
| 3682 | .skip_emulated_instruction = skip_emulated_instruction, | 3730 | .skip_emulated_instruction = skip_emulated_instruction, |
| 3731 | .set_interrupt_shadow = vmx_set_interrupt_shadow, | ||
| 3732 | .get_interrupt_shadow = vmx_get_interrupt_shadow, | ||
| 3683 | .patch_hypercall = vmx_patch_hypercall, | 3733 | .patch_hypercall = vmx_patch_hypercall, |
| 3684 | .get_irq = vmx_get_irq, | ||
| 3685 | .set_irq = vmx_inject_irq, | 3734 | .set_irq = vmx_inject_irq, |
| 3735 | .set_nmi = vmx_inject_nmi, | ||
| 3686 | .queue_exception = vmx_queue_exception, | 3736 | .queue_exception = vmx_queue_exception, |
| 3687 | .exception_injected = vmx_exception_injected, | 3737 | .interrupt_allowed = vmx_interrupt_allowed, |
| 3688 | .inject_pending_irq = vmx_intr_assist, | 3738 | .nmi_allowed = vmx_nmi_allowed, |
| 3689 | .inject_pending_vectors = do_interrupt_requests, | 3739 | .enable_nmi_window = enable_nmi_window, |
| 3740 | .enable_irq_window = enable_irq_window, | ||
| 3741 | .update_cr8_intercept = update_cr8_intercept, | ||
| 3690 | 3742 | ||
| 3691 | .set_tss_addr = vmx_set_tss_addr, | 3743 | .set_tss_addr = vmx_set_tss_addr, |
| 3692 | .get_tdp_level = get_ept_level, | 3744 | .get_tdp_level = get_ept_level, |
| 3693 | .get_mt_mask_shift = vmx_get_mt_mask_shift, | 3745 | .get_mt_mask = vmx_get_mt_mask, |
| 3694 | }; | 3746 | }; |
| 3695 | 3747 | ||
| 3696 | static int __init vmx_init(void) | 3748 | static int __init vmx_init(void) |
| 3697 | { | 3749 | { |
| 3698 | void *va; | ||
| 3699 | int r; | 3750 | int r; |
| 3700 | 3751 | ||
| 3701 | vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 3752 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
| 3702 | if (!vmx_io_bitmap_a) | 3753 | if (!vmx_io_bitmap_a) |
| 3703 | return -ENOMEM; | 3754 | return -ENOMEM; |
| 3704 | 3755 | ||
| 3705 | vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 3756 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); |
| 3706 | if (!vmx_io_bitmap_b) { | 3757 | if (!vmx_io_bitmap_b) { |
| 3707 | r = -ENOMEM; | 3758 | r = -ENOMEM; |
| 3708 | goto out; | 3759 | goto out; |
| 3709 | } | 3760 | } |
| 3710 | 3761 | ||
| 3711 | vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 3762 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); |
| 3712 | if (!vmx_msr_bitmap) { | 3763 | if (!vmx_msr_bitmap_legacy) { |
| 3713 | r = -ENOMEM; | 3764 | r = -ENOMEM; |
| 3714 | goto out1; | 3765 | goto out1; |
| 3715 | } | 3766 | } |
| 3716 | 3767 | ||
| 3768 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
| 3769 | if (!vmx_msr_bitmap_longmode) { | ||
| 3770 | r = -ENOMEM; | ||
| 3771 | goto out2; | ||
| 3772 | } | ||
| 3773 | |||
| 3717 | /* | 3774 | /* |
| 3718 | * Allow direct access to the PC debug port (it is often used for I/O | 3775 | * Allow direct access to the PC debug port (it is often used for I/O |
| 3719 | * delays, but the vmexits simply slow things down). | 3776 | * delays, but the vmexits simply slow things down). |
| 3720 | */ | 3777 | */ |
| 3721 | va = kmap(vmx_io_bitmap_a); | 3778 | memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); |
| 3722 | memset(va, 0xff, PAGE_SIZE); | 3779 | clear_bit(0x80, vmx_io_bitmap_a); |
| 3723 | clear_bit(0x80, va); | ||
| 3724 | kunmap(vmx_io_bitmap_a); | ||
| 3725 | 3780 | ||
| 3726 | va = kmap(vmx_io_bitmap_b); | 3781 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); |
| 3727 | memset(va, 0xff, PAGE_SIZE); | ||
| 3728 | kunmap(vmx_io_bitmap_b); | ||
| 3729 | 3782 | ||
| 3730 | va = kmap(vmx_msr_bitmap); | 3783 | memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); |
| 3731 | memset(va, 0xff, PAGE_SIZE); | 3784 | memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); |
| 3732 | kunmap(vmx_msr_bitmap); | ||
| 3733 | 3785 | ||
| 3734 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 3786 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
| 3735 | 3787 | ||
| 3736 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 3788 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); |
| 3737 | if (r) | 3789 | if (r) |
| 3738 | goto out2; | 3790 | goto out3; |
| 3739 | 3791 | ||
| 3740 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); | 3792 | vmx_disable_intercept_for_msr(MSR_FS_BASE, false); |
| 3741 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); | 3793 | vmx_disable_intercept_for_msr(MSR_GS_BASE, false); |
| 3742 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); | 3794 | vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); |
| 3743 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3795 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
| 3744 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3796 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
| 3797 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | ||
| 3745 | 3798 | ||
| 3746 | if (vm_need_ept()) { | 3799 | if (enable_ept) { |
| 3747 | bypass_guest_pf = 0; | 3800 | bypass_guest_pf = 0; |
| 3748 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | 3801 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | |
| 3749 | VMX_EPT_WRITABLE_MASK); | 3802 | VMX_EPT_WRITABLE_MASK); |
| 3750 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 3803 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
| 3751 | VMX_EPT_EXECUTABLE_MASK, | 3804 | VMX_EPT_EXECUTABLE_MASK); |
| 3752 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
| 3753 | kvm_enable_tdp(); | 3805 | kvm_enable_tdp(); |
| 3754 | } else | 3806 | } else |
| 3755 | kvm_disable_tdp(); | 3807 | kvm_disable_tdp(); |
| @@ -3761,20 +3813,23 @@ static int __init vmx_init(void) | |||
| 3761 | 3813 | ||
| 3762 | return 0; | 3814 | return 0; |
| 3763 | 3815 | ||
| 3816 | out3: | ||
| 3817 | free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
| 3764 | out2: | 3818 | out2: |
| 3765 | __free_page(vmx_msr_bitmap); | 3819 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
| 3766 | out1: | 3820 | out1: |
| 3767 | __free_page(vmx_io_bitmap_b); | 3821 | free_page((unsigned long)vmx_io_bitmap_b); |
| 3768 | out: | 3822 | out: |
| 3769 | __free_page(vmx_io_bitmap_a); | 3823 | free_page((unsigned long)vmx_io_bitmap_a); |
| 3770 | return r; | 3824 | return r; |
| 3771 | } | 3825 | } |
| 3772 | 3826 | ||
| 3773 | static void __exit vmx_exit(void) | 3827 | static void __exit vmx_exit(void) |
| 3774 | { | 3828 | { |
| 3775 | __free_page(vmx_msr_bitmap); | 3829 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
| 3776 | __free_page(vmx_io_bitmap_b); | 3830 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
| 3777 | __free_page(vmx_io_bitmap_a); | 3831 | free_page((unsigned long)vmx_io_bitmap_b); |
| 3832 | free_page((unsigned long)vmx_io_bitmap_a); | ||
| 3778 | 3833 | ||
| 3779 | kvm_exit(); | 3834 | kvm_exit(); |
| 3780 | } | 3835 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3944e917e794..249540f98513 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -91,7 +91,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 91 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 91 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
| 92 | { "hypercalls", VCPU_STAT(hypercalls) }, | 92 | { "hypercalls", VCPU_STAT(hypercalls) }, |
| 93 | { "request_irq", VCPU_STAT(request_irq_exits) }, | 93 | { "request_irq", VCPU_STAT(request_irq_exits) }, |
| 94 | { "request_nmi", VCPU_STAT(request_nmi_exits) }, | ||
| 95 | { "irq_exits", VCPU_STAT(irq_exits) }, | 94 | { "irq_exits", VCPU_STAT(irq_exits) }, |
| 96 | { "host_state_reload", VCPU_STAT(host_state_reload) }, | 95 | { "host_state_reload", VCPU_STAT(host_state_reload) }, |
| 97 | { "efer_reload", VCPU_STAT(efer_reload) }, | 96 | { "efer_reload", VCPU_STAT(efer_reload) }, |
| @@ -108,7 +107,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 108 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 107 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
| 109 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 108 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
| 110 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | 109 | { "mmu_unsync", VM_STAT(mmu_unsync) }, |
| 111 | { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, | ||
| 112 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 110 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
| 113 | { "largepages", VM_STAT(lpages) }, | 111 | { "largepages", VM_STAT(lpages) }, |
| 114 | { NULL } | 112 | { NULL } |
| @@ -234,7 +232,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 234 | goto out; | 232 | goto out; |
| 235 | } | 233 | } |
| 236 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { | 234 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { |
| 237 | if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { | 235 | if (is_present_pte(pdpte[i]) && |
| 236 | (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { | ||
| 238 | ret = 0; | 237 | ret = 0; |
| 239 | goto out; | 238 | goto out; |
| 240 | } | 239 | } |
| @@ -321,7 +320,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
| 321 | kvm_x86_ops->set_cr0(vcpu, cr0); | 320 | kvm_x86_ops->set_cr0(vcpu, cr0); |
| 322 | vcpu->arch.cr0 = cr0; | 321 | vcpu->arch.cr0 = cr0; |
| 323 | 322 | ||
| 324 | kvm_mmu_sync_global(vcpu); | ||
| 325 | kvm_mmu_reset_context(vcpu); | 323 | kvm_mmu_reset_context(vcpu); |
| 326 | return; | 324 | return; |
| 327 | } | 325 | } |
| @@ -370,7 +368,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
| 370 | kvm_x86_ops->set_cr4(vcpu, cr4); | 368 | kvm_x86_ops->set_cr4(vcpu, cr4); |
| 371 | vcpu->arch.cr4 = cr4; | 369 | vcpu->arch.cr4 = cr4; |
| 372 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | 370 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; |
| 373 | kvm_mmu_sync_global(vcpu); | ||
| 374 | kvm_mmu_reset_context(vcpu); | 371 | kvm_mmu_reset_context(vcpu); |
| 375 | } | 372 | } |
| 376 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 373 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
| @@ -523,6 +520,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
| 523 | efer |= vcpu->arch.shadow_efer & EFER_LMA; | 520 | efer |= vcpu->arch.shadow_efer & EFER_LMA; |
| 524 | 521 | ||
| 525 | vcpu->arch.shadow_efer = efer; | 522 | vcpu->arch.shadow_efer = efer; |
| 523 | |||
| 524 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | ||
| 525 | kvm_mmu_reset_context(vcpu); | ||
| 526 | } | 526 | } |
| 527 | 527 | ||
| 528 | void kvm_enable_efer_bits(u64 mask) | 528 | void kvm_enable_efer_bits(u64 mask) |
| @@ -630,14 +630,17 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
| 630 | unsigned long flags; | 630 | unsigned long flags; |
| 631 | struct kvm_vcpu_arch *vcpu = &v->arch; | 631 | struct kvm_vcpu_arch *vcpu = &v->arch; |
| 632 | void *shared_kaddr; | 632 | void *shared_kaddr; |
| 633 | unsigned long this_tsc_khz; | ||
| 633 | 634 | ||
| 634 | if ((!vcpu->time_page)) | 635 | if ((!vcpu->time_page)) |
| 635 | return; | 636 | return; |
| 636 | 637 | ||
| 637 | if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { | 638 | this_tsc_khz = get_cpu_var(cpu_tsc_khz); |
| 638 | kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); | 639 | if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { |
| 639 | vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 640 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); |
| 641 | vcpu->hv_clock_tsc_khz = this_tsc_khz; | ||
| 640 | } | 642 | } |
| 643 | put_cpu_var(cpu_tsc_khz); | ||
| 641 | 644 | ||
| 642 | /* Keep irq disabled to prevent changes to the clock */ | 645 | /* Keep irq disabled to prevent changes to the clock */ |
| 643 | local_irq_save(flags); | 646 | local_irq_save(flags); |
| @@ -893,6 +896,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 893 | case MSR_IA32_LASTINTFROMIP: | 896 | case MSR_IA32_LASTINTFROMIP: |
| 894 | case MSR_IA32_LASTINTTOIP: | 897 | case MSR_IA32_LASTINTTOIP: |
| 895 | case MSR_VM_HSAVE_PA: | 898 | case MSR_VM_HSAVE_PA: |
| 899 | case MSR_P6_EVNTSEL0: | ||
| 900 | case MSR_P6_EVNTSEL1: | ||
| 896 | data = 0; | 901 | data = 0; |
| 897 | break; | 902 | break; |
| 898 | case MSR_MTRRcap: | 903 | case MSR_MTRRcap: |
| @@ -1024,6 +1029,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1024 | case KVM_CAP_SYNC_MMU: | 1029 | case KVM_CAP_SYNC_MMU: |
| 1025 | case KVM_CAP_REINJECT_CONTROL: | 1030 | case KVM_CAP_REINJECT_CONTROL: |
| 1026 | case KVM_CAP_IRQ_INJECT_STATUS: | 1031 | case KVM_CAP_IRQ_INJECT_STATUS: |
| 1032 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
| 1027 | r = 1; | 1033 | r = 1; |
| 1028 | break; | 1034 | break; |
| 1029 | case KVM_CAP_COALESCED_MMIO: | 1035 | case KVM_CAP_COALESCED_MMIO: |
| @@ -1241,41 +1247,53 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1241 | entry->flags = 0; | 1247 | entry->flags = 0; |
| 1242 | } | 1248 | } |
| 1243 | 1249 | ||
| 1250 | #define F(x) bit(X86_FEATURE_##x) | ||
| 1251 | |||
| 1244 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 1252 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
| 1245 | u32 index, int *nent, int maxnent) | 1253 | u32 index, int *nent, int maxnent) |
| 1246 | { | 1254 | { |
| 1247 | const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | | 1255 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
| 1248 | bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | | ||
| 1249 | bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | | ||
| 1250 | bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | | ||
| 1251 | bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | | ||
| 1252 | bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | | ||
| 1253 | bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | | ||
| 1254 | bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | | ||
| 1255 | bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | | ||
| 1256 | bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); | ||
| 1257 | const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) | | ||
| 1258 | bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | | ||
| 1259 | bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | | ||
| 1260 | bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | | ||
| 1261 | bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | | ||
| 1262 | bit(X86_FEATURE_PGE) | | ||
| 1263 | bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | | ||
| 1264 | bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) | | ||
| 1265 | bit(X86_FEATURE_SYSCALL) | | ||
| 1266 | (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) | | ||
| 1267 | #ifdef CONFIG_X86_64 | 1256 | #ifdef CONFIG_X86_64 |
| 1268 | bit(X86_FEATURE_LM) | | 1257 | unsigned f_lm = F(LM); |
| 1258 | #else | ||
| 1259 | unsigned f_lm = 0; | ||
| 1269 | #endif | 1260 | #endif |
| 1270 | bit(X86_FEATURE_FXSR_OPT) | | 1261 | |
| 1271 | bit(X86_FEATURE_MMXEXT) | | 1262 | /* cpuid 1.edx */ |
| 1272 | bit(X86_FEATURE_3DNOWEXT) | | 1263 | const u32 kvm_supported_word0_x86_features = |
| 1273 | bit(X86_FEATURE_3DNOW); | 1264 | F(FPU) | F(VME) | F(DE) | F(PSE) | |
| 1274 | const u32 kvm_supported_word3_x86_features = | 1265 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | |
| 1275 | bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); | 1266 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | |
| 1267 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
| 1268 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
| 1269 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
| 1270 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
| 1271 | 0 /* HTT, TM, Reserved, PBE */; | ||
| 1272 | /* cpuid 0x80000001.edx */ | ||
| 1273 | const u32 kvm_supported_word1_x86_features = | ||
| 1274 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
| 1275 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
| 1276 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
| 1277 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
| 1278 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
| 1279 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
| 1280 | F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ | | ||
| 1281 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
| 1282 | /* cpuid 1.ecx */ | ||
| 1283 | const u32 kvm_supported_word4_x86_features = | ||
| 1284 | F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | | ||
| 1285 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
| 1286 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
| 1287 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
| 1288 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
| 1289 | F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) | | ||
| 1290 | 0 /* Reserved, XSAVE, OSXSAVE */; | ||
| 1291 | /* cpuid 0x80000001.ecx */ | ||
| 1276 | const u32 kvm_supported_word6_x86_features = | 1292 | const u32 kvm_supported_word6_x86_features = |
| 1277 | bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | | 1293 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | |
| 1278 | bit(X86_FEATURE_SVM); | 1294 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
| 1295 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | | ||
| 1296 | 0 /* SKINIT */ | 0 /* WDT */; | ||
| 1279 | 1297 | ||
| 1280 | /* all calls to cpuid_count() should be made on the same cpu */ | 1298 | /* all calls to cpuid_count() should be made on the same cpu */ |
| 1281 | get_cpu(); | 1299 | get_cpu(); |
| @@ -1288,7 +1306,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1288 | break; | 1306 | break; |
| 1289 | case 1: | 1307 | case 1: |
| 1290 | entry->edx &= kvm_supported_word0_x86_features; | 1308 | entry->edx &= kvm_supported_word0_x86_features; |
| 1291 | entry->ecx &= kvm_supported_word3_x86_features; | 1309 | entry->ecx &= kvm_supported_word4_x86_features; |
| 1292 | break; | 1310 | break; |
| 1293 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | 1311 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands |
| 1294 | * may return different values. This forces us to get_cpu() before | 1312 | * may return different values. This forces us to get_cpu() before |
| @@ -1350,6 +1368,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 1350 | put_cpu(); | 1368 | put_cpu(); |
| 1351 | } | 1369 | } |
| 1352 | 1370 | ||
| 1371 | #undef F | ||
| 1372 | |||
| 1353 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | 1373 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, |
| 1354 | struct kvm_cpuid_entry2 __user *entries) | 1374 | struct kvm_cpuid_entry2 __user *entries) |
| 1355 | { | 1375 | { |
| @@ -1421,8 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
| 1421 | return -ENXIO; | 1441 | return -ENXIO; |
| 1422 | vcpu_load(vcpu); | 1442 | vcpu_load(vcpu); |
| 1423 | 1443 | ||
| 1424 | set_bit(irq->irq, vcpu->arch.irq_pending); | 1444 | kvm_queue_interrupt(vcpu, irq->irq, false); |
| 1425 | set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | ||
| 1426 | 1445 | ||
| 1427 | vcpu_put(vcpu); | 1446 | vcpu_put(vcpu); |
| 1428 | 1447 | ||
| @@ -1584,8 +1603,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1584 | r = -EINVAL; | 1603 | r = -EINVAL; |
| 1585 | } | 1604 | } |
| 1586 | out: | 1605 | out: |
| 1587 | if (lapic) | 1606 | kfree(lapic); |
| 1588 | kfree(lapic); | ||
| 1589 | return r; | 1607 | return r; |
| 1590 | } | 1608 | } |
| 1591 | 1609 | ||
| @@ -1606,10 +1624,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
| 1606 | return -EINVAL; | 1624 | return -EINVAL; |
| 1607 | 1625 | ||
| 1608 | down_write(&kvm->slots_lock); | 1626 | down_write(&kvm->slots_lock); |
| 1627 | spin_lock(&kvm->mmu_lock); | ||
| 1609 | 1628 | ||
| 1610 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 1629 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
| 1611 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 1630 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
| 1612 | 1631 | ||
| 1632 | spin_unlock(&kvm->mmu_lock); | ||
| 1613 | up_write(&kvm->slots_lock); | 1633 | up_write(&kvm->slots_lock); |
| 1614 | return 0; | 1634 | return 0; |
| 1615 | } | 1635 | } |
| @@ -1785,7 +1805,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
| 1785 | 1805 | ||
| 1786 | /* If nothing is dirty, don't bother messing with page tables. */ | 1806 | /* If nothing is dirty, don't bother messing with page tables. */ |
| 1787 | if (is_dirty) { | 1807 | if (is_dirty) { |
| 1808 | spin_lock(&kvm->mmu_lock); | ||
| 1788 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 1809 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
| 1810 | spin_unlock(&kvm->mmu_lock); | ||
| 1789 | kvm_flush_remote_tlbs(kvm); | 1811 | kvm_flush_remote_tlbs(kvm); |
| 1790 | memslot = &kvm->memslots[log->slot]; | 1812 | memslot = &kvm->memslots[log->slot]; |
| 1791 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 1813 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; |
| @@ -2360,7 +2382,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2360 | u16 error_code, | 2382 | u16 error_code, |
| 2361 | int emulation_type) | 2383 | int emulation_type) |
| 2362 | { | 2384 | { |
| 2363 | int r; | 2385 | int r, shadow_mask; |
| 2364 | struct decode_cache *c; | 2386 | struct decode_cache *c; |
| 2365 | 2387 | ||
| 2366 | kvm_clear_exception_queue(vcpu); | 2388 | kvm_clear_exception_queue(vcpu); |
| @@ -2408,7 +2430,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2408 | } | 2430 | } |
| 2409 | } | 2431 | } |
| 2410 | 2432 | ||
| 2433 | if (emulation_type & EMULTYPE_SKIP) { | ||
| 2434 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); | ||
| 2435 | return EMULATE_DONE; | ||
| 2436 | } | ||
| 2437 | |||
| 2411 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 2438 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
| 2439 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | ||
| 2440 | |||
| 2441 | if (r == 0) | ||
| 2442 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | ||
| 2412 | 2443 | ||
| 2413 | if (vcpu->arch.pio.string) | 2444 | if (vcpu->arch.pio.string) |
| 2414 | return EMULATE_DO_MMIO; | 2445 | return EMULATE_DO_MMIO; |
| @@ -2761,7 +2792,7 @@ int kvm_arch_init(void *opaque) | |||
| 2761 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2792 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
| 2762 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | 2793 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); |
| 2763 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 2794 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
| 2764 | PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); | 2795 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
| 2765 | 2796 | ||
| 2766 | for_each_possible_cpu(cpu) | 2797 | for_each_possible_cpu(cpu) |
| 2767 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | 2798 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; |
| @@ -3012,6 +3043,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
| 3012 | return best; | 3043 | return best; |
| 3013 | } | 3044 | } |
| 3014 | 3045 | ||
| 3046 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
| 3047 | { | ||
| 3048 | struct kvm_cpuid_entry2 *best; | ||
| 3049 | |||
| 3050 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
| 3051 | if (best) | ||
| 3052 | return best->eax & 0xff; | ||
| 3053 | return 36; | ||
| 3054 | } | ||
| 3055 | |||
| 3015 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 3056 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) |
| 3016 | { | 3057 | { |
| 3017 | u32 function, index; | 3058 | u32 function, index; |
| @@ -3048,10 +3089,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | |||
| 3048 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | 3089 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, |
| 3049 | struct kvm_run *kvm_run) | 3090 | struct kvm_run *kvm_run) |
| 3050 | { | 3091 | { |
| 3051 | return (!vcpu->arch.irq_summary && | 3092 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && |
| 3052 | kvm_run->request_interrupt_window && | 3093 | kvm_run->request_interrupt_window && |
| 3053 | vcpu->arch.interrupt_window_open && | 3094 | kvm_arch_interrupt_allowed(vcpu)); |
| 3054 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); | ||
| 3055 | } | 3095 | } |
| 3056 | 3096 | ||
| 3057 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 3097 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, |
| @@ -3064,8 +3104,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, | |||
| 3064 | kvm_run->ready_for_interrupt_injection = 1; | 3104 | kvm_run->ready_for_interrupt_injection = 1; |
| 3065 | else | 3105 | else |
| 3066 | kvm_run->ready_for_interrupt_injection = | 3106 | kvm_run->ready_for_interrupt_injection = |
| 3067 | (vcpu->arch.interrupt_window_open && | 3107 | kvm_arch_interrupt_allowed(vcpu) && |
| 3068 | vcpu->arch.irq_summary == 0); | 3108 | !kvm_cpu_has_interrupt(vcpu) && |
| 3109 | !kvm_event_needs_reinjection(vcpu); | ||
| 3069 | } | 3110 | } |
| 3070 | 3111 | ||
| 3071 | static void vapic_enter(struct kvm_vcpu *vcpu) | 3112 | static void vapic_enter(struct kvm_vcpu *vcpu) |
| @@ -3094,9 +3135,63 @@ static void vapic_exit(struct kvm_vcpu *vcpu) | |||
| 3094 | up_read(&vcpu->kvm->slots_lock); | 3135 | up_read(&vcpu->kvm->slots_lock); |
| 3095 | } | 3136 | } |
| 3096 | 3137 | ||
| 3138 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | ||
| 3139 | { | ||
| 3140 | int max_irr, tpr; | ||
| 3141 | |||
| 3142 | if (!kvm_x86_ops->update_cr8_intercept) | ||
| 3143 | return; | ||
| 3144 | |||
| 3145 | if (!vcpu->arch.apic->vapic_addr) | ||
| 3146 | max_irr = kvm_lapic_find_highest_irr(vcpu); | ||
| 3147 | else | ||
| 3148 | max_irr = -1; | ||
| 3149 | |||
| 3150 | if (max_irr != -1) | ||
| 3151 | max_irr >>= 4; | ||
| 3152 | |||
| 3153 | tpr = kvm_lapic_get_cr8(vcpu); | ||
| 3154 | |||
| 3155 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | ||
| 3156 | } | ||
| 3157 | |||
| 3158 | static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
| 3159 | { | ||
| 3160 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 3161 | kvm_x86_ops->set_interrupt_shadow(vcpu, 0); | ||
| 3162 | |||
| 3163 | /* try to reinject previous events if any */ | ||
| 3164 | if (vcpu->arch.nmi_injected) { | ||
| 3165 | kvm_x86_ops->set_nmi(vcpu); | ||
| 3166 | return; | ||
| 3167 | } | ||
| 3168 | |||
| 3169 | if (vcpu->arch.interrupt.pending) { | ||
| 3170 | kvm_x86_ops->set_irq(vcpu); | ||
| 3171 | return; | ||
| 3172 | } | ||
| 3173 | |||
| 3174 | /* try to inject new event if pending */ | ||
| 3175 | if (vcpu->arch.nmi_pending) { | ||
| 3176 | if (kvm_x86_ops->nmi_allowed(vcpu)) { | ||
| 3177 | vcpu->arch.nmi_pending = false; | ||
| 3178 | vcpu->arch.nmi_injected = true; | ||
| 3179 | kvm_x86_ops->set_nmi(vcpu); | ||
| 3180 | } | ||
| 3181 | } else if (kvm_cpu_has_interrupt(vcpu)) { | ||
| 3182 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | ||
| 3183 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | ||
| 3184 | false); | ||
| 3185 | kvm_x86_ops->set_irq(vcpu); | ||
| 3186 | } | ||
| 3187 | } | ||
| 3188 | } | ||
| 3189 | |||
| 3097 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3190 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 3098 | { | 3191 | { |
| 3099 | int r; | 3192 | int r; |
| 3193 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | ||
| 3194 | kvm_run->request_interrupt_window; | ||
| 3100 | 3195 | ||
| 3101 | if (vcpu->requests) | 3196 | if (vcpu->requests) |
| 3102 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 3197 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
| @@ -3128,9 +3223,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3128 | } | 3223 | } |
| 3129 | } | 3224 | } |
| 3130 | 3225 | ||
| 3131 | clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | ||
| 3132 | kvm_inject_pending_timer_irqs(vcpu); | ||
| 3133 | |||
| 3134 | preempt_disable(); | 3226 | preempt_disable(); |
| 3135 | 3227 | ||
| 3136 | kvm_x86_ops->prepare_guest_switch(vcpu); | 3228 | kvm_x86_ops->prepare_guest_switch(vcpu); |
| @@ -3138,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3138 | 3230 | ||
| 3139 | local_irq_disable(); | 3231 | local_irq_disable(); |
| 3140 | 3232 | ||
| 3233 | clear_bit(KVM_REQ_KICK, &vcpu->requests); | ||
| 3234 | smp_mb__after_clear_bit(); | ||
| 3235 | |||
| 3141 | if (vcpu->requests || need_resched() || signal_pending(current)) { | 3236 | if (vcpu->requests || need_resched() || signal_pending(current)) { |
| 3142 | local_irq_enable(); | 3237 | local_irq_enable(); |
| 3143 | preempt_enable(); | 3238 | preempt_enable(); |
| @@ -3145,21 +3240,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3145 | goto out; | 3240 | goto out; |
| 3146 | } | 3241 | } |
| 3147 | 3242 | ||
| 3148 | vcpu->guest_mode = 1; | ||
| 3149 | /* | ||
| 3150 | * Make sure that guest_mode assignment won't happen after | ||
| 3151 | * testing the pending IRQ vector bitmap. | ||
| 3152 | */ | ||
| 3153 | smp_wmb(); | ||
| 3154 | |||
| 3155 | if (vcpu->arch.exception.pending) | 3243 | if (vcpu->arch.exception.pending) |
| 3156 | __queue_exception(vcpu); | 3244 | __queue_exception(vcpu); |
| 3157 | else if (irqchip_in_kernel(vcpu->kvm)) | ||
| 3158 | kvm_x86_ops->inject_pending_irq(vcpu); | ||
| 3159 | else | 3245 | else |
| 3160 | kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); | 3246 | inject_pending_irq(vcpu, kvm_run); |
| 3161 | 3247 | ||
| 3162 | kvm_lapic_sync_to_vapic(vcpu); | 3248 | /* enable NMI/IRQ window open exits if needed */ |
| 3249 | if (vcpu->arch.nmi_pending) | ||
| 3250 | kvm_x86_ops->enable_nmi_window(vcpu); | ||
| 3251 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | ||
| 3252 | kvm_x86_ops->enable_irq_window(vcpu); | ||
| 3253 | |||
| 3254 | if (kvm_lapic_enabled(vcpu)) { | ||
| 3255 | update_cr8_intercept(vcpu); | ||
| 3256 | kvm_lapic_sync_to_vapic(vcpu); | ||
| 3257 | } | ||
| 3163 | 3258 | ||
| 3164 | up_read(&vcpu->kvm->slots_lock); | 3259 | up_read(&vcpu->kvm->slots_lock); |
| 3165 | 3260 | ||
| @@ -3193,7 +3288,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3193 | set_debugreg(vcpu->arch.host_dr6, 6); | 3288 | set_debugreg(vcpu->arch.host_dr6, 6); |
| 3194 | set_debugreg(vcpu->arch.host_dr7, 7); | 3289 | set_debugreg(vcpu->arch.host_dr7, 7); |
| 3195 | 3290 | ||
| 3196 | vcpu->guest_mode = 0; | 3291 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
| 3197 | local_irq_enable(); | 3292 | local_irq_enable(); |
| 3198 | 3293 | ||
| 3199 | ++vcpu->stat.exits; | 3294 | ++vcpu->stat.exits; |
| @@ -3220,8 +3315,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3220 | profile_hit(KVM_PROFILING, (void *)rip); | 3315 | profile_hit(KVM_PROFILING, (void *)rip); |
| 3221 | } | 3316 | } |
| 3222 | 3317 | ||
| 3223 | if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) | ||
| 3224 | vcpu->arch.exception.pending = false; | ||
| 3225 | 3318 | ||
| 3226 | kvm_lapic_sync_from_vapic(vcpu); | 3319 | kvm_lapic_sync_from_vapic(vcpu); |
| 3227 | 3320 | ||
| @@ -3230,6 +3323,7 @@ out: | |||
| 3230 | return r; | 3323 | return r; |
| 3231 | } | 3324 | } |
| 3232 | 3325 | ||
| 3326 | |||
| 3233 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3327 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 3234 | { | 3328 | { |
| 3235 | int r; | 3329 | int r; |
| @@ -3256,29 +3350,42 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3256 | kvm_vcpu_block(vcpu); | 3350 | kvm_vcpu_block(vcpu); |
| 3257 | down_read(&vcpu->kvm->slots_lock); | 3351 | down_read(&vcpu->kvm->slots_lock); |
| 3258 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 3352 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) |
| 3259 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | 3353 | { |
| 3354 | switch(vcpu->arch.mp_state) { | ||
| 3355 | case KVM_MP_STATE_HALTED: | ||
| 3260 | vcpu->arch.mp_state = | 3356 | vcpu->arch.mp_state = |
| 3261 | KVM_MP_STATE_RUNNABLE; | 3357 | KVM_MP_STATE_RUNNABLE; |
| 3262 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | 3358 | case KVM_MP_STATE_RUNNABLE: |
| 3263 | r = -EINTR; | 3359 | break; |
| 3360 | case KVM_MP_STATE_SIPI_RECEIVED: | ||
| 3361 | default: | ||
| 3362 | r = -EINTR; | ||
| 3363 | break; | ||
| 3364 | } | ||
| 3365 | } | ||
| 3264 | } | 3366 | } |
| 3265 | 3367 | ||
| 3266 | if (r > 0) { | 3368 | if (r <= 0) |
| 3267 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 3369 | break; |
| 3268 | r = -EINTR; | 3370 | |
| 3269 | kvm_run->exit_reason = KVM_EXIT_INTR; | 3371 | clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); |
| 3270 | ++vcpu->stat.request_irq_exits; | 3372 | if (kvm_cpu_has_pending_timer(vcpu)) |
| 3271 | } | 3373 | kvm_inject_pending_timer_irqs(vcpu); |
| 3272 | if (signal_pending(current)) { | 3374 | |
| 3273 | r = -EINTR; | 3375 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { |
| 3274 | kvm_run->exit_reason = KVM_EXIT_INTR; | 3376 | r = -EINTR; |
| 3275 | ++vcpu->stat.signal_exits; | 3377 | kvm_run->exit_reason = KVM_EXIT_INTR; |
| 3276 | } | 3378 | ++vcpu->stat.request_irq_exits; |
| 3277 | if (need_resched()) { | 3379 | } |
| 3278 | up_read(&vcpu->kvm->slots_lock); | 3380 | if (signal_pending(current)) { |
| 3279 | kvm_resched(vcpu); | 3381 | r = -EINTR; |
| 3280 | down_read(&vcpu->kvm->slots_lock); | 3382 | kvm_run->exit_reason = KVM_EXIT_INTR; |
| 3281 | } | 3383 | ++vcpu->stat.signal_exits; |
| 3384 | } | ||
| 3385 | if (need_resched()) { | ||
| 3386 | up_read(&vcpu->kvm->slots_lock); | ||
| 3387 | kvm_resched(vcpu); | ||
| 3388 | down_read(&vcpu->kvm->slots_lock); | ||
| 3282 | } | 3389 | } |
| 3283 | } | 3390 | } |
| 3284 | 3391 | ||
| @@ -3442,7 +3549,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 3442 | struct kvm_sregs *sregs) | 3549 | struct kvm_sregs *sregs) |
| 3443 | { | 3550 | { |
| 3444 | struct descriptor_table dt; | 3551 | struct descriptor_table dt; |
| 3445 | int pending_vec; | ||
| 3446 | 3552 | ||
| 3447 | vcpu_load(vcpu); | 3553 | vcpu_load(vcpu); |
| 3448 | 3554 | ||
| @@ -3472,16 +3578,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
| 3472 | sregs->efer = vcpu->arch.shadow_efer; | 3578 | sregs->efer = vcpu->arch.shadow_efer; |
| 3473 | sregs->apic_base = kvm_get_apic_base(vcpu); | 3579 | sregs->apic_base = kvm_get_apic_base(vcpu); |
| 3474 | 3580 | ||
| 3475 | if (irqchip_in_kernel(vcpu->kvm)) { | 3581 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
| 3476 | memset(sregs->interrupt_bitmap, 0, | 3582 | |
| 3477 | sizeof sregs->interrupt_bitmap); | 3583 | if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) |
| 3478 | pending_vec = kvm_x86_ops->get_irq(vcpu); | 3584 | set_bit(vcpu->arch.interrupt.nr, |
| 3479 | if (pending_vec >= 0) | 3585 | (unsigned long *)sregs->interrupt_bitmap); |
| 3480 | set_bit(pending_vec, | ||
| 3481 | (unsigned long *)sregs->interrupt_bitmap); | ||
| 3482 | } else | ||
| 3483 | memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending, | ||
| 3484 | sizeof sregs->interrupt_bitmap); | ||
| 3485 | 3586 | ||
| 3486 | vcpu_put(vcpu); | 3587 | vcpu_put(vcpu); |
| 3487 | 3588 | ||
| @@ -3688,7 +3789,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
| 3688 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | 3789 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); |
| 3689 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | 3790 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); |
| 3690 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 3791 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
| 3691 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
| 3692 | } | 3792 | } |
| 3693 | 3793 | ||
| 3694 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | 3794 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, |
| @@ -3785,8 +3885,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
| 3785 | } | 3885 | } |
| 3786 | 3886 | ||
| 3787 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | 3887 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, |
| 3788 | u32 old_tss_base, | 3888 | u16 old_tss_sel, u32 old_tss_base, |
| 3789 | struct desc_struct *nseg_desc) | 3889 | struct desc_struct *nseg_desc) |
| 3790 | { | 3890 | { |
| 3791 | struct tss_segment_16 tss_segment_16; | 3891 | struct tss_segment_16 tss_segment_16; |
| 3792 | int ret = 0; | 3892 | int ret = 0; |
| @@ -3805,6 +3905,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
| 3805 | &tss_segment_16, sizeof tss_segment_16)) | 3905 | &tss_segment_16, sizeof tss_segment_16)) |
| 3806 | goto out; | 3906 | goto out; |
| 3807 | 3907 | ||
| 3908 | if (old_tss_sel != 0xffff) { | ||
| 3909 | tss_segment_16.prev_task_link = old_tss_sel; | ||
| 3910 | |||
| 3911 | if (kvm_write_guest(vcpu->kvm, | ||
| 3912 | get_tss_base_addr(vcpu, nseg_desc), | ||
| 3913 | &tss_segment_16.prev_task_link, | ||
| 3914 | sizeof tss_segment_16.prev_task_link)) | ||
| 3915 | goto out; | ||
| 3916 | } | ||
| 3917 | |||
| 3808 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | 3918 | if (load_state_from_tss16(vcpu, &tss_segment_16)) |
| 3809 | goto out; | 3919 | goto out; |
| 3810 | 3920 | ||
| @@ -3814,7 +3924,7 @@ out: | |||
| 3814 | } | 3924 | } |
| 3815 | 3925 | ||
| 3816 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | 3926 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, |
| 3817 | u32 old_tss_base, | 3927 | u16 old_tss_sel, u32 old_tss_base, |
| 3818 | struct desc_struct *nseg_desc) | 3928 | struct desc_struct *nseg_desc) |
| 3819 | { | 3929 | { |
| 3820 | struct tss_segment_32 tss_segment_32; | 3930 | struct tss_segment_32 tss_segment_32; |
| @@ -3834,6 +3944,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
| 3834 | &tss_segment_32, sizeof tss_segment_32)) | 3944 | &tss_segment_32, sizeof tss_segment_32)) |
| 3835 | goto out; | 3945 | goto out; |
| 3836 | 3946 | ||
| 3947 | if (old_tss_sel != 0xffff) { | ||
| 3948 | tss_segment_32.prev_task_link = old_tss_sel; | ||
| 3949 | |||
| 3950 | if (kvm_write_guest(vcpu->kvm, | ||
| 3951 | get_tss_base_addr(vcpu, nseg_desc), | ||
| 3952 | &tss_segment_32.prev_task_link, | ||
| 3953 | sizeof tss_segment_32.prev_task_link)) | ||
| 3954 | goto out; | ||
| 3955 | } | ||
| 3956 | |||
| 3837 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | 3957 | if (load_state_from_tss32(vcpu, &tss_segment_32)) |
| 3838 | goto out; | 3958 | goto out; |
| 3839 | 3959 | ||
| @@ -3887,14 +4007,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3887 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | 4007 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); |
| 3888 | } | 4008 | } |
| 3889 | 4009 | ||
| 3890 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 4010 | /* set back link to prev task only if NT bit is set in eflags |
| 4011 | note that old_tss_sel is not used afetr this point */ | ||
| 4012 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 4013 | old_tss_sel = 0xffff; | ||
| 4014 | |||
| 4015 | /* set back link to prev task only if NT bit is set in eflags | ||
| 4016 | note that old_tss_sel is not used afetr this point */ | ||
| 4017 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 4018 | old_tss_sel = 0xffff; | ||
| 3891 | 4019 | ||
| 3892 | if (nseg_desc.type & 8) | 4020 | if (nseg_desc.type & 8) |
| 3893 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, | 4021 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, |
| 3894 | &nseg_desc); | 4022 | old_tss_base, &nseg_desc); |
| 3895 | else | 4023 | else |
| 3896 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, | 4024 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, |
| 3897 | &nseg_desc); | 4025 | old_tss_base, &nseg_desc); |
| 3898 | 4026 | ||
| 3899 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 4027 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
| 3900 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4028 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); |
| @@ -3920,7 +4048,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3920 | struct kvm_sregs *sregs) | 4048 | struct kvm_sregs *sregs) |
| 3921 | { | 4049 | { |
| 3922 | int mmu_reset_needed = 0; | 4050 | int mmu_reset_needed = 0; |
| 3923 | int i, pending_vec, max_bits; | 4051 | int pending_vec, max_bits; |
| 3924 | struct descriptor_table dt; | 4052 | struct descriptor_table dt; |
| 3925 | 4053 | ||
| 3926 | vcpu_load(vcpu); | 4054 | vcpu_load(vcpu); |
| @@ -3934,7 +4062,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3934 | 4062 | ||
| 3935 | vcpu->arch.cr2 = sregs->cr2; | 4063 | vcpu->arch.cr2 = sregs->cr2; |
| 3936 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; | 4064 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; |
| 3937 | vcpu->arch.cr3 = sregs->cr3; | 4065 | |
| 4066 | down_read(&vcpu->kvm->slots_lock); | ||
| 4067 | if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT)) | ||
| 4068 | vcpu->arch.cr3 = sregs->cr3; | ||
| 4069 | else | ||
| 4070 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
| 4071 | up_read(&vcpu->kvm->slots_lock); | ||
| 3938 | 4072 | ||
| 3939 | kvm_set_cr8(vcpu, sregs->cr8); | 4073 | kvm_set_cr8(vcpu, sregs->cr8); |
| 3940 | 4074 | ||
| @@ -3956,25 +4090,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3956 | if (mmu_reset_needed) | 4090 | if (mmu_reset_needed) |
| 3957 | kvm_mmu_reset_context(vcpu); | 4091 | kvm_mmu_reset_context(vcpu); |
| 3958 | 4092 | ||
| 3959 | if (!irqchip_in_kernel(vcpu->kvm)) { | 4093 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; |
| 3960 | memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap, | 4094 | pending_vec = find_first_bit( |
| 3961 | sizeof vcpu->arch.irq_pending); | 4095 | (const unsigned long *)sregs->interrupt_bitmap, max_bits); |
| 3962 | vcpu->arch.irq_summary = 0; | 4096 | if (pending_vec < max_bits) { |
| 3963 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i) | 4097 | kvm_queue_interrupt(vcpu, pending_vec, false); |
| 3964 | if (vcpu->arch.irq_pending[i]) | 4098 | pr_debug("Set back pending irq %d\n", pending_vec); |
| 3965 | __set_bit(i, &vcpu->arch.irq_summary); | 4099 | if (irqchip_in_kernel(vcpu->kvm)) |
| 3966 | } else { | 4100 | kvm_pic_clear_isr_ack(vcpu->kvm); |
| 3967 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; | ||
| 3968 | pending_vec = find_first_bit( | ||
| 3969 | (const unsigned long *)sregs->interrupt_bitmap, | ||
| 3970 | max_bits); | ||
| 3971 | /* Only pending external irq is handled here */ | ||
| 3972 | if (pending_vec < max_bits) { | ||
| 3973 | kvm_x86_ops->set_irq(vcpu, pending_vec); | ||
| 3974 | pr_debug("Set back pending irq %d\n", | ||
| 3975 | pending_vec); | ||
| 3976 | } | ||
| 3977 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
| 3978 | } | 4101 | } |
| 3979 | 4102 | ||
| 3980 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 4103 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
| @@ -4308,7 +4431,6 @@ struct kvm *kvm_arch_create_vm(void) | |||
| 4308 | return ERR_PTR(-ENOMEM); | 4431 | return ERR_PTR(-ENOMEM); |
| 4309 | 4432 | ||
| 4310 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 4433 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
| 4311 | INIT_LIST_HEAD(&kvm->arch.oos_global_pages); | ||
| 4312 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 4434 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
| 4313 | 4435 | ||
| 4314 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 4436 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
| @@ -4411,12 +4533,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 4411 | } | 4533 | } |
| 4412 | } | 4534 | } |
| 4413 | 4535 | ||
| 4536 | spin_lock(&kvm->mmu_lock); | ||
| 4414 | if (!kvm->arch.n_requested_mmu_pages) { | 4537 | if (!kvm->arch.n_requested_mmu_pages) { |
| 4415 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 4538 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
| 4416 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 4539 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
| 4417 | } | 4540 | } |
| 4418 | 4541 | ||
| 4419 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 4542 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
| 4543 | spin_unlock(&kvm->mmu_lock); | ||
| 4420 | kvm_flush_remote_tlbs(kvm); | 4544 | kvm_flush_remote_tlbs(kvm); |
| 4421 | 4545 | ||
| 4422 | return 0; | 4546 | return 0; |
| @@ -4425,6 +4549,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 4425 | void kvm_arch_flush_shadow(struct kvm *kvm) | 4549 | void kvm_arch_flush_shadow(struct kvm *kvm) |
| 4426 | { | 4550 | { |
| 4427 | kvm_mmu_zap_all(kvm); | 4551 | kvm_mmu_zap_all(kvm); |
| 4552 | kvm_reload_remote_mmus(kvm); | ||
| 4428 | } | 4553 | } |
| 4429 | 4554 | ||
| 4430 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 4555 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
| @@ -4434,28 +4559,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 4434 | || vcpu->arch.nmi_pending; | 4559 | || vcpu->arch.nmi_pending; |
| 4435 | } | 4560 | } |
| 4436 | 4561 | ||
| 4437 | static void vcpu_kick_intr(void *info) | ||
| 4438 | { | ||
| 4439 | #ifdef DEBUG | ||
| 4440 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; | ||
| 4441 | printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); | ||
| 4442 | #endif | ||
| 4443 | } | ||
| 4444 | |||
| 4445 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | 4562 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) |
| 4446 | { | 4563 | { |
| 4447 | int ipi_pcpu = vcpu->cpu; | 4564 | int me; |
| 4448 | int cpu = get_cpu(); | 4565 | int cpu = vcpu->cpu; |
| 4449 | 4566 | ||
| 4450 | if (waitqueue_active(&vcpu->wq)) { | 4567 | if (waitqueue_active(&vcpu->wq)) { |
| 4451 | wake_up_interruptible(&vcpu->wq); | 4568 | wake_up_interruptible(&vcpu->wq); |
| 4452 | ++vcpu->stat.halt_wakeup; | 4569 | ++vcpu->stat.halt_wakeup; |
| 4453 | } | 4570 | } |
| 4454 | /* | 4571 | |
| 4455 | * We may be called synchronously with irqs disabled in guest mode, | 4572 | me = get_cpu(); |
| 4456 | * So need not to call smp_call_function_single() in that case. | 4573 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
| 4457 | */ | 4574 | if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) |
| 4458 | if (vcpu->guest_mode && vcpu->cpu != cpu) | 4575 | smp_send_reschedule(cpu); |
| 4459 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); | ||
| 4460 | put_cpu(); | 4576 | put_cpu(); |
| 4461 | } | 4577 | } |
| 4578 | |||
| 4579 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | ||
| 4580 | { | ||
| 4581 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
| 4582 | } | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 6a4be78a7384..4c8e10af78e8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
| @@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | |||
| 8 | vcpu->arch.exception.pending = false; | 8 | vcpu->arch.exception.pending = false; |
| 9 | } | 9 | } |
| 10 | 10 | ||
| 11 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) | 11 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, |
| 12 | bool soft) | ||
| 12 | { | 13 | { |
| 13 | vcpu->arch.interrupt.pending = true; | 14 | vcpu->arch.interrupt.pending = true; |
| 15 | vcpu->arch.interrupt.soft = soft; | ||
| 14 | vcpu->arch.interrupt.nr = vector; | 16 | vcpu->arch.interrupt.nr = vector; |
| 15 | } | 17 | } |
| 16 | 18 | ||
| @@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | |||
| 19 | vcpu->arch.interrupt.pending = false; | 21 | vcpu->arch.interrupt.pending = false; |
| 20 | } | 22 | } |
| 21 | 23 | ||
| 24 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) | ||
| 25 | { | ||
| 26 | return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || | ||
| 27 | vcpu->arch.nmi_injected; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline bool kvm_exception_is_soft(unsigned int nr) | ||
| 31 | { | ||
| 32 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); | ||
| 33 | } | ||
| 22 | #endif | 34 | #endif |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ca91749d2083..c1b6c232e02b 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
| @@ -59,13 +59,14 @@ | |||
| 59 | #define SrcImm (5<<4) /* Immediate operand. */ | 59 | #define SrcImm (5<<4) /* Immediate operand. */ |
| 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ | 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ |
| 61 | #define SrcOne (7<<4) /* Implied '1' */ | 61 | #define SrcOne (7<<4) /* Implied '1' */ |
| 62 | #define SrcMask (7<<4) | 62 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
| 63 | #define SrcMask (0xf<<4) | ||
| 63 | /* Generic ModRM decode. */ | 64 | /* Generic ModRM decode. */ |
| 64 | #define ModRM (1<<7) | 65 | #define ModRM (1<<8) |
| 65 | /* Destination is only written; never read. */ | 66 | /* Destination is only written; never read. */ |
| 66 | #define Mov (1<<8) | 67 | #define Mov (1<<9) |
| 67 | #define BitOp (1<<9) | 68 | #define BitOp (1<<10) |
| 68 | #define MemAbs (1<<10) /* Memory operand is absolute displacement */ | 69 | #define MemAbs (1<<11) /* Memory operand is absolute displacement */ |
| 69 | #define String (1<<12) /* String instruction (rep capable) */ | 70 | #define String (1<<12) /* String instruction (rep capable) */ |
| 70 | #define Stack (1<<13) /* Stack instruction (push/pop) */ | 71 | #define Stack (1<<13) /* Stack instruction (push/pop) */ |
| 71 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 72 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
| @@ -76,6 +77,7 @@ | |||
| 76 | #define Src2CL (1<<29) | 77 | #define Src2CL (1<<29) |
| 77 | #define Src2ImmByte (2<<29) | 78 | #define Src2ImmByte (2<<29) |
| 78 | #define Src2One (3<<29) | 79 | #define Src2One (3<<29) |
| 80 | #define Src2Imm16 (4<<29) | ||
| 79 | #define Src2Mask (7<<29) | 81 | #define Src2Mask (7<<29) |
| 80 | 82 | ||
| 81 | enum { | 83 | enum { |
| @@ -135,11 +137,11 @@ static u32 opcode_table[256] = { | |||
| 135 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 137 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ |
| 136 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 138 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ |
| 137 | /* 0x70 - 0x77 */ | 139 | /* 0x70 - 0x77 */ |
| 138 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 140 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| 139 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 141 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| 140 | /* 0x78 - 0x7F */ | 142 | /* 0x78 - 0x7F */ |
| 141 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 143 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| 142 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 144 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
| 143 | /* 0x80 - 0x87 */ | 145 | /* 0x80 - 0x87 */ |
| 144 | Group | Group1_80, Group | Group1_81, | 146 | Group | Group1_80, Group | Group1_81, |
| 145 | Group | Group1_82, Group | Group1_83, | 147 | Group | Group1_82, Group | Group1_83, |
| @@ -153,7 +155,8 @@ static u32 opcode_table[256] = { | |||
| 153 | /* 0x90 - 0x97 */ | 155 | /* 0x90 - 0x97 */ |
| 154 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 156 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
| 155 | /* 0x98 - 0x9F */ | 157 | /* 0x98 - 0x9F */ |
| 156 | 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 158 | 0, 0, SrcImm | Src2Imm16, 0, |
| 159 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | ||
| 157 | /* 0xA0 - 0xA7 */ | 160 | /* 0xA0 - 0xA7 */ |
| 158 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 161 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
| 159 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 162 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
| @@ -178,7 +181,8 @@ static u32 opcode_table[256] = { | |||
| 178 | 0, ImplicitOps | Stack, 0, 0, | 181 | 0, ImplicitOps | Stack, 0, 0, |
| 179 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | 182 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, |
| 180 | /* 0xC8 - 0xCF */ | 183 | /* 0xC8 - 0xCF */ |
| 181 | 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, | 184 | 0, 0, 0, ImplicitOps | Stack, |
| 185 | ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, | ||
| 182 | /* 0xD0 - 0xD7 */ | 186 | /* 0xD0 - 0xD7 */ |
| 183 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 187 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
| 184 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 188 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
| @@ -187,11 +191,11 @@ static u32 opcode_table[256] = { | |||
| 187 | 0, 0, 0, 0, 0, 0, 0, 0, | 191 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 188 | /* 0xE0 - 0xE7 */ | 192 | /* 0xE0 - 0xE7 */ |
| 189 | 0, 0, 0, 0, | 193 | 0, 0, 0, 0, |
| 190 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 194 | ByteOp | SrcImmUByte, SrcImmUByte, |
| 191 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 195 | ByteOp | SrcImmUByte, SrcImmUByte, |
| 192 | /* 0xE8 - 0xEF */ | 196 | /* 0xE8 - 0xEF */ |
| 193 | ImplicitOps | Stack, SrcImm | ImplicitOps, | 197 | SrcImm | Stack, SrcImm | ImplicitOps, |
| 194 | ImplicitOps, SrcImmByte | ImplicitOps, | 198 | SrcImm | Src2Imm16, SrcImmByte | ImplicitOps, |
| 195 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 199 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
| 196 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 200 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
| 197 | /* 0xF0 - 0xF7 */ | 201 | /* 0xF0 - 0xF7 */ |
| @@ -230,10 +234,8 @@ static u32 twobyte_table[256] = { | |||
| 230 | /* 0x70 - 0x7F */ | 234 | /* 0x70 - 0x7F */ |
| 231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 235 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 232 | /* 0x80 - 0x8F */ | 236 | /* 0x80 - 0x8F */ |
| 233 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 237 | SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, |
| 234 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 238 | SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, |
| 235 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
| 236 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
| 237 | /* 0x90 - 0x9F */ | 239 | /* 0x90 - 0x9F */ |
| 238 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 240 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 239 | /* 0xA0 - 0xA7 */ | 241 | /* 0xA0 - 0xA7 */ |
| @@ -1044,10 +1046,14 @@ done_prefixes: | |||
| 1044 | } | 1046 | } |
| 1045 | break; | 1047 | break; |
| 1046 | case SrcImmByte: | 1048 | case SrcImmByte: |
| 1049 | case SrcImmUByte: | ||
| 1047 | c->src.type = OP_IMM; | 1050 | c->src.type = OP_IMM; |
| 1048 | c->src.ptr = (unsigned long *)c->eip; | 1051 | c->src.ptr = (unsigned long *)c->eip; |
| 1049 | c->src.bytes = 1; | 1052 | c->src.bytes = 1; |
| 1050 | c->src.val = insn_fetch(s8, 1, c->eip); | 1053 | if ((c->d & SrcMask) == SrcImmByte) |
| 1054 | c->src.val = insn_fetch(s8, 1, c->eip); | ||
| 1055 | else | ||
| 1056 | c->src.val = insn_fetch(u8, 1, c->eip); | ||
| 1051 | break; | 1057 | break; |
| 1052 | case SrcOne: | 1058 | case SrcOne: |
| 1053 | c->src.bytes = 1; | 1059 | c->src.bytes = 1; |
| @@ -1072,6 +1078,12 @@ done_prefixes: | |||
| 1072 | c->src2.bytes = 1; | 1078 | c->src2.bytes = 1; |
| 1073 | c->src2.val = insn_fetch(u8, 1, c->eip); | 1079 | c->src2.val = insn_fetch(u8, 1, c->eip); |
| 1074 | break; | 1080 | break; |
| 1081 | case Src2Imm16: | ||
| 1082 | c->src2.type = OP_IMM; | ||
| 1083 | c->src2.ptr = (unsigned long *)c->eip; | ||
| 1084 | c->src2.bytes = 2; | ||
| 1085 | c->src2.val = insn_fetch(u16, 2, c->eip); | ||
| 1086 | break; | ||
| 1075 | case Src2One: | 1087 | case Src2One: |
| 1076 | c->src2.bytes = 1; | 1088 | c->src2.bytes = 1; |
| 1077 | c->src2.val = 1; | 1089 | c->src2.val = 1; |
| @@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
| 1349 | return 0; | 1361 | return 0; |
| 1350 | } | 1362 | } |
| 1351 | 1363 | ||
| 1364 | void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | ||
| 1365 | { | ||
| 1366 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | ||
| 1367 | /* | ||
| 1368 | * an sti; sti; sequence only disable interrupts for the first | ||
| 1369 | * instruction. So, if the last instruction, be it emulated or | ||
| 1370 | * not, left the system with the INT_STI flag enabled, it | ||
| 1371 | * means that the last instruction is an sti. We should not | ||
| 1372 | * leave the flag on in this case. The same goes for mov ss | ||
| 1373 | */ | ||
| 1374 | if (!(int_shadow & mask)) | ||
| 1375 | ctxt->interruptibility = mask; | ||
| 1376 | } | ||
| 1377 | |||
| 1352 | int | 1378 | int |
| 1353 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 1379 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
| 1354 | { | 1380 | { |
| @@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1360 | int io_dir_in; | 1386 | int io_dir_in; |
| 1361 | int rc = 0; | 1387 | int rc = 0; |
| 1362 | 1388 | ||
| 1389 | ctxt->interruptibility = 0; | ||
| 1390 | |||
| 1363 | /* Shadow copy of register state. Committed on successful emulation. | 1391 | /* Shadow copy of register state. Committed on successful emulation. |
| 1364 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't | 1392 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't |
| 1365 | * modify them. | 1393 | * modify them. |
| @@ -1531,13 +1559,10 @@ special_insn: | |||
| 1531 | return -1; | 1559 | return -1; |
| 1532 | } | 1560 | } |
| 1533 | return 0; | 1561 | return 0; |
| 1534 | case 0x70 ... 0x7f: /* jcc (short) */ { | 1562 | case 0x70 ... 0x7f: /* jcc (short) */ |
| 1535 | int rel = insn_fetch(s8, 1, c->eip); | ||
| 1536 | |||
| 1537 | if (test_cc(c->b, ctxt->eflags)) | 1563 | if (test_cc(c->b, ctxt->eflags)) |
| 1538 | jmp_rel(c, rel); | 1564 | jmp_rel(c, c->src.val); |
| 1539 | break; | 1565 | break; |
| 1540 | } | ||
| 1541 | case 0x80 ... 0x83: /* Grp1 */ | 1566 | case 0x80 ... 0x83: /* Grp1 */ |
| 1542 | switch (c->modrm_reg) { | 1567 | switch (c->modrm_reg) { |
| 1543 | case 0: | 1568 | case 0: |
| @@ -1609,6 +1634,9 @@ special_insn: | |||
| 1609 | int err; | 1634 | int err; |
| 1610 | 1635 | ||
| 1611 | sel = c->src.val; | 1636 | sel = c->src.val; |
| 1637 | if (c->modrm_reg == VCPU_SREG_SS) | ||
| 1638 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | ||
| 1639 | |||
| 1612 | if (c->modrm_reg <= 5) { | 1640 | if (c->modrm_reg <= 5) { |
| 1613 | type_bits = (c->modrm_reg == 1) ? 9 : 1; | 1641 | type_bits = (c->modrm_reg == 1) ? 9 : 1; |
| 1614 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, | 1642 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, |
| @@ -1769,59 +1797,32 @@ special_insn: | |||
| 1769 | break; | 1797 | break; |
| 1770 | case 0xe4: /* inb */ | 1798 | case 0xe4: /* inb */ |
| 1771 | case 0xe5: /* in */ | 1799 | case 0xe5: /* in */ |
| 1772 | port = insn_fetch(u8, 1, c->eip); | 1800 | port = c->src.val; |
| 1773 | io_dir_in = 1; | 1801 | io_dir_in = 1; |
| 1774 | goto do_io; | 1802 | goto do_io; |
| 1775 | case 0xe6: /* outb */ | 1803 | case 0xe6: /* outb */ |
| 1776 | case 0xe7: /* out */ | 1804 | case 0xe7: /* out */ |
| 1777 | port = insn_fetch(u8, 1, c->eip); | 1805 | port = c->src.val; |
| 1778 | io_dir_in = 0; | 1806 | io_dir_in = 0; |
| 1779 | goto do_io; | 1807 | goto do_io; |
| 1780 | case 0xe8: /* call (near) */ { | 1808 | case 0xe8: /* call (near) */ { |
| 1781 | long int rel; | 1809 | long int rel = c->src.val; |
| 1782 | switch (c->op_bytes) { | ||
| 1783 | case 2: | ||
| 1784 | rel = insn_fetch(s16, 2, c->eip); | ||
| 1785 | break; | ||
| 1786 | case 4: | ||
| 1787 | rel = insn_fetch(s32, 4, c->eip); | ||
| 1788 | break; | ||
| 1789 | default: | ||
| 1790 | DPRINTF("Call: Invalid op_bytes\n"); | ||
| 1791 | goto cannot_emulate; | ||
| 1792 | } | ||
| 1793 | c->src.val = (unsigned long) c->eip; | 1810 | c->src.val = (unsigned long) c->eip; |
| 1794 | jmp_rel(c, rel); | 1811 | jmp_rel(c, rel); |
| 1795 | c->op_bytes = c->ad_bytes; | ||
| 1796 | emulate_push(ctxt); | 1812 | emulate_push(ctxt); |
| 1797 | break; | 1813 | break; |
| 1798 | } | 1814 | } |
| 1799 | case 0xe9: /* jmp rel */ | 1815 | case 0xe9: /* jmp rel */ |
| 1800 | goto jmp; | 1816 | goto jmp; |
| 1801 | case 0xea: /* jmp far */ { | 1817 | case 0xea: /* jmp far */ |
| 1802 | uint32_t eip; | 1818 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, |
| 1803 | uint16_t sel; | 1819 | VCPU_SREG_CS) < 0) { |
| 1804 | |||
| 1805 | switch (c->op_bytes) { | ||
| 1806 | case 2: | ||
| 1807 | eip = insn_fetch(u16, 2, c->eip); | ||
| 1808 | break; | ||
| 1809 | case 4: | ||
| 1810 | eip = insn_fetch(u32, 4, c->eip); | ||
| 1811 | break; | ||
| 1812 | default: | ||
| 1813 | DPRINTF("jmp far: Invalid op_bytes\n"); | ||
| 1814 | goto cannot_emulate; | ||
| 1815 | } | ||
| 1816 | sel = insn_fetch(u16, 2, c->eip); | ||
| 1817 | if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { | ||
| 1818 | DPRINTF("jmp far: Failed to load CS descriptor\n"); | 1820 | DPRINTF("jmp far: Failed to load CS descriptor\n"); |
| 1819 | goto cannot_emulate; | 1821 | goto cannot_emulate; |
| 1820 | } | 1822 | } |
| 1821 | 1823 | ||
| 1822 | c->eip = eip; | 1824 | c->eip = c->src.val; |
| 1823 | break; | 1825 | break; |
| 1824 | } | ||
| 1825 | case 0xeb: | 1826 | case 0xeb: |
| 1826 | jmp: /* jmp rel short */ | 1827 | jmp: /* jmp rel short */ |
| 1827 | jmp_rel(c, c->src.val); | 1828 | jmp_rel(c, c->src.val); |
| @@ -1865,6 +1866,7 @@ special_insn: | |||
| 1865 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1866 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 1866 | break; | 1867 | break; |
| 1867 | case 0xfb: /* sti */ | 1868 | case 0xfb: /* sti */ |
| 1869 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | ||
| 1868 | ctxt->eflags |= X86_EFLAGS_IF; | 1870 | ctxt->eflags |= X86_EFLAGS_IF; |
| 1869 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1871 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 1870 | break; | 1872 | break; |
| @@ -2039,28 +2041,11 @@ twobyte_insn: | |||
| 2039 | if (!test_cc(c->b, ctxt->eflags)) | 2041 | if (!test_cc(c->b, ctxt->eflags)) |
| 2040 | c->dst.type = OP_NONE; /* no writeback */ | 2042 | c->dst.type = OP_NONE; /* no writeback */ |
| 2041 | break; | 2043 | break; |
| 2042 | case 0x80 ... 0x8f: /* jnz rel, etc*/ { | 2044 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
| 2043 | long int rel; | ||
| 2044 | |||
| 2045 | switch (c->op_bytes) { | ||
| 2046 | case 2: | ||
| 2047 | rel = insn_fetch(s16, 2, c->eip); | ||
| 2048 | break; | ||
| 2049 | case 4: | ||
| 2050 | rel = insn_fetch(s32, 4, c->eip); | ||
| 2051 | break; | ||
| 2052 | case 8: | ||
| 2053 | rel = insn_fetch(s64, 8, c->eip); | ||
| 2054 | break; | ||
| 2055 | default: | ||
| 2056 | DPRINTF("jnz: Invalid op_bytes\n"); | ||
| 2057 | goto cannot_emulate; | ||
| 2058 | } | ||
| 2059 | if (test_cc(c->b, ctxt->eflags)) | 2045 | if (test_cc(c->b, ctxt->eflags)) |
| 2060 | jmp_rel(c, rel); | 2046 | jmp_rel(c, c->src.val); |
| 2061 | c->dst.type = OP_NONE; | 2047 | c->dst.type = OP_NONE; |
| 2062 | break; | 2048 | break; |
| 2063 | } | ||
| 2064 | case 0xa3: | 2049 | case 0xa3: |
| 2065 | bt: /* bt */ | 2050 | bt: /* bt */ |
| 2066 | c->dst.type = OP_NONE; | 2051 | c->dst.type = OP_NONE; |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8cc137911b34..3db5d8d37485 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -119,7 +119,7 @@ struct kvm_run { | |||
| 119 | __u32 error_code; | 119 | __u32 error_code; |
| 120 | } ex; | 120 | } ex; |
| 121 | /* KVM_EXIT_IO */ | 121 | /* KVM_EXIT_IO */ |
| 122 | struct kvm_io { | 122 | struct { |
| 123 | #define KVM_EXIT_IO_IN 0 | 123 | #define KVM_EXIT_IO_IN 0 |
| 124 | #define KVM_EXIT_IO_OUT 1 | 124 | #define KVM_EXIT_IO_OUT 1 |
| 125 | __u8 direction; | 125 | __u8 direction; |
| @@ -224,10 +224,10 @@ struct kvm_interrupt { | |||
| 224 | /* for KVM_GET_DIRTY_LOG */ | 224 | /* for KVM_GET_DIRTY_LOG */ |
| 225 | struct kvm_dirty_log { | 225 | struct kvm_dirty_log { |
| 226 | __u32 slot; | 226 | __u32 slot; |
| 227 | __u32 padding; | 227 | __u32 padding1; |
| 228 | union { | 228 | union { |
| 229 | void __user *dirty_bitmap; /* one bit per page */ | 229 | void __user *dirty_bitmap; /* one bit per page */ |
| 230 | __u64 padding; | 230 | __u64 padding2; |
| 231 | }; | 231 | }; |
| 232 | }; | 232 | }; |
| 233 | 233 | ||
| @@ -409,6 +409,10 @@ struct kvm_trace_rec { | |||
| 409 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | 409 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT |
| 410 | #define KVM_CAP_DEVICE_DEASSIGNMENT 27 | 410 | #define KVM_CAP_DEVICE_DEASSIGNMENT 27 |
| 411 | #endif | 411 | #endif |
| 412 | #ifdef __KVM_HAVE_MSIX | ||
| 413 | #define KVM_CAP_DEVICE_MSIX 28 | ||
| 414 | #endif | ||
| 415 | #define KVM_CAP_ASSIGN_DEV_IRQ 29 | ||
| 412 | /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ | 416 | /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ |
| 413 | #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 | 417 | #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 |
| 414 | 418 | ||
| @@ -482,11 +486,18 @@ struct kvm_irq_routing { | |||
| 482 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ | 486 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ |
| 483 | struct kvm_assigned_pci_dev) | 487 | struct kvm_assigned_pci_dev) |
| 484 | #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) | 488 | #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) |
| 489 | /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ | ||
| 485 | #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ | 490 | #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ |
| 486 | struct kvm_assigned_irq) | 491 | struct kvm_assigned_irq) |
| 492 | #define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) | ||
| 487 | #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) | 493 | #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) |
| 488 | #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ | 494 | #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ |
| 489 | struct kvm_assigned_pci_dev) | 495 | struct kvm_assigned_pci_dev) |
| 496 | #define KVM_ASSIGN_SET_MSIX_NR \ | ||
| 497 | _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) | ||
| 498 | #define KVM_ASSIGN_SET_MSIX_ENTRY \ | ||
| 499 | _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) | ||
| 500 | #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) | ||
| 490 | 501 | ||
| 491 | /* | 502 | /* |
| 492 | * ioctls for vcpu fds | 503 | * ioctls for vcpu fds |
| @@ -577,6 +588,8 @@ struct kvm_debug_guest { | |||
| 577 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) | 588 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) |
| 578 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) | 589 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) |
| 579 | 590 | ||
| 591 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | ||
| 592 | |||
| 580 | struct kvm_assigned_pci_dev { | 593 | struct kvm_assigned_pci_dev { |
| 581 | __u32 assigned_dev_id; | 594 | __u32 assigned_dev_id; |
| 582 | __u32 busnr; | 595 | __u32 busnr; |
| @@ -587,6 +600,17 @@ struct kvm_assigned_pci_dev { | |||
| 587 | }; | 600 | }; |
| 588 | }; | 601 | }; |
| 589 | 602 | ||
| 603 | #define KVM_DEV_IRQ_HOST_INTX (1 << 0) | ||
| 604 | #define KVM_DEV_IRQ_HOST_MSI (1 << 1) | ||
| 605 | #define KVM_DEV_IRQ_HOST_MSIX (1 << 2) | ||
| 606 | |||
| 607 | #define KVM_DEV_IRQ_GUEST_INTX (1 << 8) | ||
| 608 | #define KVM_DEV_IRQ_GUEST_MSI (1 << 9) | ||
| 609 | #define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) | ||
| 610 | |||
| 611 | #define KVM_DEV_IRQ_HOST_MASK 0x00ff | ||
| 612 | #define KVM_DEV_IRQ_GUEST_MASK 0xff00 | ||
| 613 | |||
| 590 | struct kvm_assigned_irq { | 614 | struct kvm_assigned_irq { |
| 591 | __u32 assigned_dev_id; | 615 | __u32 assigned_dev_id; |
| 592 | __u32 host_irq; | 616 | __u32 host_irq; |
| @@ -602,9 +626,19 @@ struct kvm_assigned_irq { | |||
| 602 | }; | 626 | }; |
| 603 | }; | 627 | }; |
| 604 | 628 | ||
| 605 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | ||
| 606 | 629 | ||
| 607 | #define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI | 630 | struct kvm_assigned_msix_nr { |
| 608 | #define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) | 631 | __u32 assigned_dev_id; |
| 632 | __u16 entry_nr; | ||
| 633 | __u16 padding; | ||
| 634 | }; | ||
| 635 | |||
| 636 | #define KVM_MAX_MSIX_PER_DEV 512 | ||
| 637 | struct kvm_assigned_msix_entry { | ||
| 638 | __u32 assigned_dev_id; | ||
| 639 | __u32 gsi; | ||
| 640 | __u16 entry; /* The index of entry in the MSI-X table */ | ||
| 641 | __u16 padding[3]; | ||
| 642 | }; | ||
| 609 | 643 | ||
| 610 | #endif | 644 | #endif |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 894a56e365e8..aacc5449f586 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #define KVM_REQ_UNHALT 6 | 38 | #define KVM_REQ_UNHALT 6 |
| 39 | #define KVM_REQ_MMU_SYNC 7 | 39 | #define KVM_REQ_MMU_SYNC 7 |
| 40 | #define KVM_REQ_KVMCLOCK_UPDATE 8 | 40 | #define KVM_REQ_KVMCLOCK_UPDATE 8 |
| 41 | #define KVM_REQ_KICK 9 | ||
| 41 | 42 | ||
| 42 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 43 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
| 43 | 44 | ||
| @@ -72,7 +73,6 @@ struct kvm_vcpu { | |||
| 72 | struct mutex mutex; | 73 | struct mutex mutex; |
| 73 | int cpu; | 74 | int cpu; |
| 74 | struct kvm_run *run; | 75 | struct kvm_run *run; |
| 75 | int guest_mode; | ||
| 76 | unsigned long requests; | 76 | unsigned long requests; |
| 77 | unsigned long guest_debug; | 77 | unsigned long guest_debug; |
| 78 | int fpu_active; | 78 | int fpu_active; |
| @@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void); | |||
| 298 | void kvm_arch_hardware_unsetup(void); | 298 | void kvm_arch_hardware_unsetup(void); |
| 299 | void kvm_arch_check_processor_compat(void *rtn); | 299 | void kvm_arch_check_processor_compat(void *rtn); |
| 300 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); | 300 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); |
| 301 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | ||
| 301 | 302 | ||
| 302 | void kvm_free_physmem(struct kvm *kvm); | 303 | void kvm_free_physmem(struct kvm *kvm); |
| 303 | 304 | ||
| @@ -319,6 +320,13 @@ struct kvm_irq_ack_notifier { | |||
| 319 | void (*irq_acked)(struct kvm_irq_ack_notifier *kian); | 320 | void (*irq_acked)(struct kvm_irq_ack_notifier *kian); |
| 320 | }; | 321 | }; |
| 321 | 322 | ||
| 323 | #define KVM_ASSIGNED_MSIX_PENDING 0x1 | ||
| 324 | struct kvm_guest_msix_entry { | ||
| 325 | u32 vector; | ||
| 326 | u16 entry; | ||
| 327 | u16 flags; | ||
| 328 | }; | ||
| 329 | |||
| 322 | struct kvm_assigned_dev_kernel { | 330 | struct kvm_assigned_dev_kernel { |
| 323 | struct kvm_irq_ack_notifier ack_notifier; | 331 | struct kvm_irq_ack_notifier ack_notifier; |
| 324 | struct work_struct interrupt_work; | 332 | struct work_struct interrupt_work; |
| @@ -326,18 +334,18 @@ struct kvm_assigned_dev_kernel { | |||
| 326 | int assigned_dev_id; | 334 | int assigned_dev_id; |
| 327 | int host_busnr; | 335 | int host_busnr; |
| 328 | int host_devfn; | 336 | int host_devfn; |
| 337 | unsigned int entries_nr; | ||
| 329 | int host_irq; | 338 | int host_irq; |
| 330 | bool host_irq_disabled; | 339 | bool host_irq_disabled; |
| 340 | struct msix_entry *host_msix_entries; | ||
| 331 | int guest_irq; | 341 | int guest_irq; |
| 332 | #define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) | 342 | struct kvm_guest_msix_entry *guest_msix_entries; |
| 333 | #define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) | ||
| 334 | #define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) | ||
| 335 | #define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) | ||
| 336 | unsigned long irq_requested_type; | 343 | unsigned long irq_requested_type; |
| 337 | int irq_source_id; | 344 | int irq_source_id; |
| 338 | int flags; | 345 | int flags; |
| 339 | struct pci_dev *dev; | 346 | struct pci_dev *dev; |
| 340 | struct kvm *kvm; | 347 | struct kvm *kvm; |
| 348 | spinlock_t assigned_dev_lock; | ||
| 341 | }; | 349 | }; |
| 342 | 350 | ||
| 343 | struct kvm_irq_mask_notifier { | 351 | struct kvm_irq_mask_notifier { |
| @@ -360,6 +368,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); | |||
| 360 | int kvm_request_irq_source_id(struct kvm *kvm); | 368 | int kvm_request_irq_source_id(struct kvm *kvm); |
| 361 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); | 369 | void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); |
| 362 | 370 | ||
| 371 | /* For vcpu->arch.iommu_flags */ | ||
| 372 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 | ||
| 373 | |||
| 363 | #ifdef CONFIG_IOMMU_API | 374 | #ifdef CONFIG_IOMMU_API |
| 364 | int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, | 375 | int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, |
| 365 | unsigned long npages); | 376 | unsigned long npages); |
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2b8318c83e53..fb46efbeabec 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h | |||
| @@ -40,4 +40,31 @@ typedef unsigned long hfn_t; | |||
| 40 | 40 | ||
| 41 | typedef hfn_t pfn_t; | 41 | typedef hfn_t pfn_t; |
| 42 | 42 | ||
| 43 | union kvm_ioapic_redirect_entry { | ||
| 44 | u64 bits; | ||
| 45 | struct { | ||
| 46 | u8 vector; | ||
| 47 | u8 delivery_mode:3; | ||
| 48 | u8 dest_mode:1; | ||
| 49 | u8 delivery_status:1; | ||
| 50 | u8 polarity:1; | ||
| 51 | u8 remote_irr:1; | ||
| 52 | u8 trig_mode:1; | ||
| 53 | u8 mask:1; | ||
| 54 | u8 reserve:7; | ||
| 55 | u8 reserved[4]; | ||
| 56 | u8 dest_id; | ||
| 57 | } fields; | ||
| 58 | }; | ||
| 59 | |||
| 60 | struct kvm_lapic_irq { | ||
| 61 | u32 vector; | ||
| 62 | u32 delivery_mode; | ||
| 63 | u32 dest_mode; | ||
| 64 | u32 level; | ||
| 65 | u32 trig_mode; | ||
| 66 | u32 shorthand; | ||
| 67 | u32 dest_id; | ||
| 68 | }; | ||
| 69 | |||
| 43 | #endif /* __KVM_TYPES_H__ */ | 70 | #endif /* __KVM_TYPES_H__ */ |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index c3b99def9cbc..1eddae94bab3 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, | |||
| 85 | 85 | ||
| 86 | static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) | 86 | static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) |
| 87 | { | 87 | { |
| 88 | union ioapic_redir_entry *pent; | 88 | union kvm_ioapic_redirect_entry *pent; |
| 89 | int injected = -1; | 89 | int injected = -1; |
| 90 | 90 | ||
| 91 | pent = &ioapic->redirtbl[idx]; | 91 | pent = &ioapic->redirtbl[idx]; |
| @@ -142,149 +142,40 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
| 142 | } | 142 | } |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | static int ioapic_inj_irq(struct kvm_ioapic *ioapic, | ||
| 146 | struct kvm_vcpu *vcpu, | ||
| 147 | u8 vector, u8 trig_mode, u8 delivery_mode) | ||
| 148 | { | ||
| 149 | ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode, | ||
| 150 | delivery_mode); | ||
| 151 | |||
| 152 | ASSERT((delivery_mode == IOAPIC_FIXED) || | ||
| 153 | (delivery_mode == IOAPIC_LOWEST_PRIORITY)); | ||
| 154 | |||
| 155 | return kvm_apic_set_irq(vcpu, vector, trig_mode); | ||
| 156 | } | ||
| 157 | |||
| 158 | static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) | ||
| 159 | { | ||
| 160 | kvm_inject_nmi(vcpu); | ||
| 161 | kvm_vcpu_kick(vcpu); | ||
| 162 | } | ||
| 163 | |||
| 164 | u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, | ||
| 165 | u8 dest_mode) | ||
| 166 | { | ||
| 167 | u32 mask = 0; | ||
| 168 | int i; | ||
| 169 | struct kvm *kvm = ioapic->kvm; | ||
| 170 | struct kvm_vcpu *vcpu; | ||
| 171 | |||
| 172 | ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode); | ||
| 173 | |||
| 174 | if (dest_mode == 0) { /* Physical mode. */ | ||
| 175 | if (dest == 0xFF) { /* Broadcast. */ | ||
| 176 | for (i = 0; i < KVM_MAX_VCPUS; ++i) | ||
| 177 | if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic) | ||
| 178 | mask |= 1 << i; | ||
| 179 | return mask; | ||
| 180 | } | ||
| 181 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
| 182 | vcpu = kvm->vcpus[i]; | ||
| 183 | if (!vcpu) | ||
| 184 | continue; | ||
| 185 | if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) { | ||
| 186 | if (vcpu->arch.apic) | ||
| 187 | mask = 1 << i; | ||
| 188 | break; | ||
| 189 | } | ||
| 190 | } | ||
| 191 | } else if (dest != 0) /* Logical mode, MDA non-zero. */ | ||
| 192 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
| 193 | vcpu = kvm->vcpus[i]; | ||
| 194 | if (!vcpu) | ||
| 195 | continue; | ||
| 196 | if (vcpu->arch.apic && | ||
| 197 | kvm_apic_match_logical_addr(vcpu->arch.apic, dest)) | ||
| 198 | mask |= 1 << vcpu->vcpu_id; | ||
| 199 | } | ||
| 200 | ioapic_debug("mask %x\n", mask); | ||
| 201 | return mask; | ||
| 202 | } | ||
| 203 | |||
| 204 | static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) | 145 | static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) |
| 205 | { | 146 | { |
| 206 | u8 dest = ioapic->redirtbl[irq].fields.dest_id; | 147 | union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; |
| 207 | u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; | 148 | struct kvm_lapic_irq irqe; |
| 208 | u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode; | ||
| 209 | u8 vector = ioapic->redirtbl[irq].fields.vector; | ||
| 210 | u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; | ||
| 211 | u32 deliver_bitmask; | ||
| 212 | struct kvm_vcpu *vcpu; | ||
| 213 | int vcpu_id, r = -1; | ||
| 214 | 149 | ||
| 215 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " | 150 | ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " |
| 216 | "vector=%x trig_mode=%x\n", | 151 | "vector=%x trig_mode=%x\n", |
| 217 | dest, dest_mode, delivery_mode, vector, trig_mode); | 152 | entry->fields.dest, entry->fields.dest_mode, |
| 218 | 153 | entry->fields.delivery_mode, entry->fields.vector, | |
| 219 | deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, | 154 | entry->fields.trig_mode); |
| 220 | dest_mode); | 155 | |
| 221 | if (!deliver_bitmask) { | 156 | irqe.dest_id = entry->fields.dest_id; |
| 222 | ioapic_debug("no target on destination\n"); | 157 | irqe.vector = entry->fields.vector; |
| 223 | return 0; | 158 | irqe.dest_mode = entry->fields.dest_mode; |
| 224 | } | 159 | irqe.trig_mode = entry->fields.trig_mode; |
| 160 | irqe.delivery_mode = entry->fields.delivery_mode << 8; | ||
| 161 | irqe.level = 1; | ||
| 162 | irqe.shorthand = 0; | ||
| 225 | 163 | ||
| 226 | switch (delivery_mode) { | ||
| 227 | case IOAPIC_LOWEST_PRIORITY: | ||
| 228 | vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, | ||
| 229 | deliver_bitmask); | ||
| 230 | #ifdef CONFIG_X86 | 164 | #ifdef CONFIG_X86 |
| 231 | if (irq == 0) | 165 | /* Always delivery PIT interrupt to vcpu 0 */ |
| 232 | vcpu = ioapic->kvm->vcpus[0]; | 166 | if (irq == 0) { |
| 233 | #endif | 167 | irqe.dest_mode = 0; /* Physical mode. */ |
| 234 | if (vcpu != NULL) | 168 | irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; |
| 235 | r = ioapic_inj_irq(ioapic, vcpu, vector, | ||
| 236 | trig_mode, delivery_mode); | ||
| 237 | else | ||
| 238 | ioapic_debug("null lowest prio vcpu: " | ||
| 239 | "mask=%x vector=%x delivery_mode=%x\n", | ||
| 240 | deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY); | ||
| 241 | break; | ||
| 242 | case IOAPIC_FIXED: | ||
| 243 | #ifdef CONFIG_X86 | ||
| 244 | if (irq == 0) | ||
| 245 | deliver_bitmask = 1; | ||
| 246 | #endif | ||
| 247 | for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { | ||
| 248 | if (!(deliver_bitmask & (1 << vcpu_id))) | ||
| 249 | continue; | ||
| 250 | deliver_bitmask &= ~(1 << vcpu_id); | ||
| 251 | vcpu = ioapic->kvm->vcpus[vcpu_id]; | ||
| 252 | if (vcpu) { | ||
| 253 | if (r < 0) | ||
| 254 | r = 0; | ||
| 255 | r += ioapic_inj_irq(ioapic, vcpu, vector, | ||
| 256 | trig_mode, delivery_mode); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | break; | ||
| 260 | case IOAPIC_NMI: | ||
| 261 | for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { | ||
| 262 | if (!(deliver_bitmask & (1 << vcpu_id))) | ||
| 263 | continue; | ||
| 264 | deliver_bitmask &= ~(1 << vcpu_id); | ||
| 265 | vcpu = ioapic->kvm->vcpus[vcpu_id]; | ||
| 266 | if (vcpu) { | ||
| 267 | ioapic_inj_nmi(vcpu); | ||
| 268 | r = 1; | ||
| 269 | } | ||
| 270 | else | ||
| 271 | ioapic_debug("NMI to vcpu %d failed\n", | ||
| 272 | vcpu->vcpu_id); | ||
| 273 | } | ||
| 274 | break; | ||
| 275 | default: | ||
| 276 | printk(KERN_WARNING "Unsupported delivery mode %d\n", | ||
| 277 | delivery_mode); | ||
| 278 | break; | ||
| 279 | } | 169 | } |
| 280 | return r; | 170 | #endif |
| 171 | return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); | ||
| 281 | } | 172 | } |
| 282 | 173 | ||
| 283 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | 174 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) |
| 284 | { | 175 | { |
| 285 | u32 old_irr = ioapic->irr; | 176 | u32 old_irr = ioapic->irr; |
| 286 | u32 mask = 1 << irq; | 177 | u32 mask = 1 << irq; |
| 287 | union ioapic_redir_entry entry; | 178 | union kvm_ioapic_redirect_entry entry; |
| 288 | int ret = 1; | 179 | int ret = 1; |
| 289 | 180 | ||
| 290 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 181 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
| @@ -305,7 +196,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
| 305 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, | 196 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, |
| 306 | int trigger_mode) | 197 | int trigger_mode) |
| 307 | { | 198 | { |
| 308 | union ioapic_redir_entry *ent; | 199 | union kvm_ioapic_redirect_entry *ent; |
| 309 | 200 | ||
| 310 | ent = &ioapic->redirtbl[pin]; | 201 | ent = &ioapic->redirtbl[pin]; |
| 311 | 202 | ||
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a34bd5e6436b..7080b713c160 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
| @@ -40,22 +40,7 @@ struct kvm_ioapic { | |||
| 40 | u32 id; | 40 | u32 id; |
| 41 | u32 irr; | 41 | u32 irr; |
| 42 | u32 pad; | 42 | u32 pad; |
| 43 | union ioapic_redir_entry { | 43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; |
| 44 | u64 bits; | ||
| 45 | struct { | ||
| 46 | u8 vector; | ||
| 47 | u8 delivery_mode:3; | ||
| 48 | u8 dest_mode:1; | ||
| 49 | u8 delivery_status:1; | ||
| 50 | u8 polarity:1; | ||
| 51 | u8 remote_irr:1; | ||
| 52 | u8 trig_mode:1; | ||
| 53 | u8 mask:1; | ||
| 54 | u8 reserve:7; | ||
| 55 | u8 reserved[4]; | ||
| 56 | u8 dest_id; | ||
| 57 | } fields; | ||
| 58 | } redirtbl[IOAPIC_NUM_PINS]; | ||
| 59 | struct kvm_io_device dev; | 44 | struct kvm_io_device dev; |
| 60 | struct kvm *kvm; | 45 | struct kvm *kvm; |
| 61 | void (*ack_notifier)(void *opaque, int irq); | 46 | void (*ack_notifier)(void *opaque, int irq); |
| @@ -79,13 +64,13 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
| 79 | return kvm->arch.vioapic; | 64 | return kvm->arch.vioapic; |
| 80 | } | 65 | } |
| 81 | 66 | ||
| 82 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | 67 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
| 83 | unsigned long bitmap); | 68 | int short_hand, int dest, int dest_mode); |
| 69 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | ||
| 84 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); | 70 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); |
| 85 | int kvm_ioapic_init(struct kvm *kvm); | 71 | int kvm_ioapic_init(struct kvm *kvm); |
| 86 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | 72 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); |
| 87 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 73 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
| 88 | u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, | 74 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
| 89 | u8 dest_mode); | 75 | struct kvm_lapic_irq *irq); |
| 90 | |||
| 91 | #endif | 76 | #endif |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 4c4037503600..15147583abd1 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
| @@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm, | |||
| 39 | pfn_t pfn; | 39 | pfn_t pfn; |
| 40 | int i, r = 0; | 40 | int i, r = 0; |
| 41 | struct iommu_domain *domain = kvm->arch.iommu_domain; | 41 | struct iommu_domain *domain = kvm->arch.iommu_domain; |
| 42 | int flags; | ||
| 42 | 43 | ||
| 43 | /* check if iommu exists and in use */ | 44 | /* check if iommu exists and in use */ |
| 44 | if (!domain) | 45 | if (!domain) |
| 45 | return 0; | 46 | return 0; |
| 46 | 47 | ||
| 48 | flags = IOMMU_READ | IOMMU_WRITE; | ||
| 49 | if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) | ||
| 50 | flags |= IOMMU_CACHE; | ||
| 51 | |||
| 47 | for (i = 0; i < npages; i++) { | 52 | for (i = 0; i < npages; i++) { |
| 48 | /* check if already mapped */ | 53 | /* check if already mapped */ |
| 49 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) | 54 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) |
| @@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, | |||
| 53 | r = iommu_map_range(domain, | 58 | r = iommu_map_range(domain, |
| 54 | gfn_to_gpa(gfn), | 59 | gfn_to_gpa(gfn), |
| 55 | pfn_to_hpa(pfn), | 60 | pfn_to_hpa(pfn), |
| 56 | PAGE_SIZE, | 61 | PAGE_SIZE, flags); |
| 57 | IOMMU_READ | IOMMU_WRITE); | ||
| 58 | if (r) { | 62 | if (r) { |
| 59 | printk(KERN_ERR "kvm_iommu_map_address:" | 63 | printk(KERN_ERR "kvm_iommu_map_address:" |
| 60 | "iommu failed to map pfn=%lx\n", pfn); | 64 | "iommu failed to map pfn=%lx\n", pfn); |
| @@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm, | |||
| 88 | { | 92 | { |
| 89 | struct pci_dev *pdev = NULL; | 93 | struct pci_dev *pdev = NULL; |
| 90 | struct iommu_domain *domain = kvm->arch.iommu_domain; | 94 | struct iommu_domain *domain = kvm->arch.iommu_domain; |
| 91 | int r; | 95 | int r, last_flags; |
| 92 | 96 | ||
| 93 | /* check if iommu exists and in use */ | 97 | /* check if iommu exists and in use */ |
| 94 | if (!domain) | 98 | if (!domain) |
| @@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm, | |||
| 107 | return r; | 111 | return r; |
| 108 | } | 112 | } |
| 109 | 113 | ||
| 114 | last_flags = kvm->arch.iommu_flags; | ||
| 115 | if (iommu_domain_has_cap(kvm->arch.iommu_domain, | ||
| 116 | IOMMU_CAP_CACHE_COHERENCY)) | ||
| 117 | kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; | ||
| 118 | |||
| 119 | /* Check if need to update IOMMU page table for guest memory */ | ||
| 120 | if ((last_flags ^ kvm->arch.iommu_flags) == | ||
| 121 | KVM_IOMMU_CACHE_COHERENCY) { | ||
| 122 | kvm_iommu_unmap_memslots(kvm); | ||
| 123 | r = kvm_iommu_map_memslots(kvm); | ||
| 124 | if (r) | ||
| 125 | goto out_unmap; | ||
| 126 | } | ||
| 127 | |||
| 110 | printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", | 128 | printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", |
| 111 | assigned_dev->host_busnr, | 129 | assigned_dev->host_busnr, |
| 112 | PCI_SLOT(assigned_dev->host_devfn), | 130 | PCI_SLOT(assigned_dev->host_devfn), |
| 113 | PCI_FUNC(assigned_dev->host_devfn)); | 131 | PCI_FUNC(assigned_dev->host_devfn)); |
| 114 | 132 | ||
| 115 | return 0; | 133 | return 0; |
| 134 | out_unmap: | ||
| 135 | kvm_iommu_unmap_memslots(kvm); | ||
| 136 | return r; | ||
| 116 | } | 137 | } |
| 117 | 138 | ||
| 118 | int kvm_deassign_device(struct kvm *kvm, | 139 | int kvm_deassign_device(struct kvm *kvm, |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 864ac5483baa..a8bd466d00cc 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
| @@ -22,6 +22,9 @@ | |||
| 22 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| 23 | 23 | ||
| 24 | #include <asm/msidef.h> | 24 | #include <asm/msidef.h> |
| 25 | #ifdef CONFIG_IA64 | ||
| 26 | #include <asm/iosapic.h> | ||
| 27 | #endif | ||
| 25 | 28 | ||
| 26 | #include "irq.h" | 29 | #include "irq.h" |
| 27 | 30 | ||
| @@ -43,57 +46,73 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | |||
| 43 | return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); | 46 | return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); |
| 44 | } | 47 | } |
| 45 | 48 | ||
| 46 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 49 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
| 47 | struct kvm *kvm, int level) | ||
| 48 | { | 50 | { |
| 49 | int vcpu_id, r = -1; | 51 | #ifdef CONFIG_IA64 |
| 50 | struct kvm_vcpu *vcpu; | 52 | return irq->delivery_mode == |
| 51 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | 53 | (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); |
| 52 | int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) | 54 | #else |
| 53 | >> MSI_ADDR_DEST_ID_SHIFT; | 55 | return irq->delivery_mode == APIC_DM_LOWEST; |
| 54 | int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) | 56 | #endif |
| 55 | >> MSI_DATA_VECTOR_SHIFT; | 57 | } |
| 56 | int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, | 58 | |
| 57 | (unsigned long *)&e->msi.address_lo); | 59 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
| 58 | int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, | 60 | struct kvm_lapic_irq *irq) |
| 59 | (unsigned long *)&e->msi.data); | 61 | { |
| 60 | int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, | 62 | int i, r = -1; |
| 61 | (unsigned long *)&e->msi.data); | 63 | struct kvm_vcpu *vcpu, *lowest = NULL; |
| 62 | u32 deliver_bitmask; | 64 | |
| 63 | 65 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | |
| 64 | BUG_ON(!ioapic); | 66 | kvm_is_dm_lowest_prio(irq)) |
| 65 | 67 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | |
| 66 | deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, | 68 | |
| 67 | dest_id, dest_mode); | 69 | for (i = 0; i < KVM_MAX_VCPUS; i++) { |
| 68 | /* IOAPIC delivery mode value is the same as MSI here */ | 70 | vcpu = kvm->vcpus[i]; |
| 69 | switch (delivery_mode) { | 71 | |
| 70 | case IOAPIC_LOWEST_PRIORITY: | 72 | if (!vcpu || !kvm_apic_present(vcpu)) |
| 71 | vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, | 73 | continue; |
| 72 | deliver_bitmask); | 74 | |
| 73 | if (vcpu != NULL) | 75 | if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, |
| 74 | r = kvm_apic_set_irq(vcpu, vector, trig_mode); | 76 | irq->dest_id, irq->dest_mode)) |
| 75 | else | 77 | continue; |
| 76 | printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); | 78 | |
| 77 | break; | 79 | if (!kvm_is_dm_lowest_prio(irq)) { |
| 78 | case IOAPIC_FIXED: | 80 | if (r < 0) |
| 79 | for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { | 81 | r = 0; |
| 80 | if (!(deliver_bitmask & (1 << vcpu_id))) | 82 | r += kvm_apic_set_irq(vcpu, irq); |
| 81 | continue; | 83 | } else { |
| 82 | deliver_bitmask &= ~(1 << vcpu_id); | 84 | if (!lowest) |
| 83 | vcpu = ioapic->kvm->vcpus[vcpu_id]; | 85 | lowest = vcpu; |
| 84 | if (vcpu) { | 86 | else if (kvm_apic_compare_prio(vcpu, lowest) < 0) |
| 85 | if (r < 0) | 87 | lowest = vcpu; |
| 86 | r = 0; | ||
| 87 | r += kvm_apic_set_irq(vcpu, vector, trig_mode); | ||
| 88 | } | ||
| 89 | } | 88 | } |
| 90 | break; | ||
| 91 | default: | ||
| 92 | break; | ||
| 93 | } | 89 | } |
| 90 | |||
| 91 | if (lowest) | ||
| 92 | r = kvm_apic_set_irq(lowest, irq); | ||
| 93 | |||
| 94 | return r; | 94 | return r; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | ||
| 98 | struct kvm *kvm, int level) | ||
| 99 | { | ||
| 100 | struct kvm_lapic_irq irq; | ||
| 101 | |||
| 102 | irq.dest_id = (e->msi.address_lo & | ||
| 103 | MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; | ||
| 104 | irq.vector = (e->msi.data & | ||
| 105 | MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; | ||
| 106 | irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; | ||
| 107 | irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; | ||
| 108 | irq.delivery_mode = e->msi.data & 0x700; | ||
| 109 | irq.level = 1; | ||
| 110 | irq.shorthand = 0; | ||
| 111 | |||
| 112 | /* TODO Deal with RH bit of MSI message address */ | ||
| 113 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | ||
| 114 | } | ||
| 115 | |||
| 97 | /* This should be called with the kvm->lock mutex held | 116 | /* This should be called with the kvm->lock mutex held |
| 98 | * Return value: | 117 | * Return value: |
| 99 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | 118 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
| @@ -252,7 +271,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
| 252 | delta = 8; | 271 | delta = 8; |
| 253 | break; | 272 | break; |
| 254 | case KVM_IRQCHIP_IOAPIC: | 273 | case KVM_IRQCHIP_IOAPIC: |
| 255 | e->set = kvm_set_ioapic_irq; | 274 | e->set = kvm_set_ioapic_irq; |
| 256 | break; | 275 | break; |
| 257 | default: | 276 | default: |
| 258 | goto out; | 277 | goto out; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d0dd390aa50..e21194566b71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -41,6 +41,8 @@ | |||
| 41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
| 42 | #include <linux/mman.h> | 42 | #include <linux/mman.h> |
| 43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
| 44 | #include <linux/bitops.h> | ||
| 45 | #include <linux/spinlock.h> | ||
| 44 | 46 | ||
| 45 | #include <asm/processor.h> | 47 | #include <asm/processor.h> |
| 46 | #include <asm/io.h> | 48 | #include <asm/io.h> |
| @@ -60,9 +62,6 @@ | |||
| 60 | MODULE_AUTHOR("Qumranet"); | 62 | MODULE_AUTHOR("Qumranet"); |
| 61 | MODULE_LICENSE("GPL"); | 63 | MODULE_LICENSE("GPL"); |
| 62 | 64 | ||
| 63 | static int msi2intx = 1; | ||
| 64 | module_param(msi2intx, bool, 0); | ||
| 65 | |||
| 66 | DEFINE_SPINLOCK(kvm_lock); | 65 | DEFINE_SPINLOCK(kvm_lock); |
| 67 | LIST_HEAD(vm_list); | 66 | LIST_HEAD(vm_list); |
| 68 | 67 | ||
| @@ -95,38 +94,96 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h | |||
| 95 | return NULL; | 94 | return NULL; |
| 96 | } | 95 | } |
| 97 | 96 | ||
| 97 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
| 98 | *assigned_dev, int irq) | ||
| 99 | { | ||
| 100 | int i, index; | ||
| 101 | struct msix_entry *host_msix_entries; | ||
| 102 | |||
| 103 | host_msix_entries = assigned_dev->host_msix_entries; | ||
| 104 | |||
| 105 | index = -1; | ||
| 106 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 107 | if (irq == host_msix_entries[i].vector) { | ||
| 108 | index = i; | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | if (index < 0) { | ||
| 112 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | return index; | ||
| 117 | } | ||
| 118 | |||
| 98 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | 119 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) |
| 99 | { | 120 | { |
| 100 | struct kvm_assigned_dev_kernel *assigned_dev; | 121 | struct kvm_assigned_dev_kernel *assigned_dev; |
| 122 | struct kvm *kvm; | ||
| 123 | int irq, i; | ||
| 101 | 124 | ||
| 102 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | 125 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, |
| 103 | interrupt_work); | 126 | interrupt_work); |
| 127 | kvm = assigned_dev->kvm; | ||
| 104 | 128 | ||
| 105 | /* This is taken to safely inject irq inside the guest. When | 129 | /* This is taken to safely inject irq inside the guest. When |
| 106 | * the interrupt injection (or the ioapic code) uses a | 130 | * the interrupt injection (or the ioapic code) uses a |
| 107 | * finer-grained lock, update this | 131 | * finer-grained lock, update this |
| 108 | */ | 132 | */ |
| 109 | mutex_lock(&assigned_dev->kvm->lock); | 133 | mutex_lock(&kvm->lock); |
| 110 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | 134 | spin_lock_irq(&assigned_dev->assigned_dev_lock); |
| 111 | assigned_dev->guest_irq, 1); | 135 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
| 112 | 136 | struct kvm_guest_msix_entry *guest_entries = | |
| 113 | if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { | 137 | assigned_dev->guest_msix_entries; |
| 114 | enable_irq(assigned_dev->host_irq); | 138 | for (i = 0; i < assigned_dev->entries_nr; i++) { |
| 115 | assigned_dev->host_irq_disabled = false; | 139 | if (!(guest_entries[i].flags & |
| 140 | KVM_ASSIGNED_MSIX_PENDING)) | ||
| 141 | continue; | ||
| 142 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
| 143 | kvm_set_irq(assigned_dev->kvm, | ||
| 144 | assigned_dev->irq_source_id, | ||
| 145 | guest_entries[i].vector, 1); | ||
| 146 | irq = assigned_dev->host_msix_entries[i].vector; | ||
| 147 | if (irq != 0) | ||
| 148 | enable_irq(irq); | ||
| 149 | assigned_dev->host_irq_disabled = false; | ||
| 150 | } | ||
| 151 | } else { | ||
| 152 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
| 153 | assigned_dev->guest_irq, 1); | ||
| 154 | if (assigned_dev->irq_requested_type & | ||
| 155 | KVM_DEV_IRQ_GUEST_MSI) { | ||
| 156 | enable_irq(assigned_dev->host_irq); | ||
| 157 | assigned_dev->host_irq_disabled = false; | ||
| 158 | } | ||
| 116 | } | 159 | } |
| 160 | |||
| 161 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
| 117 | mutex_unlock(&assigned_dev->kvm->lock); | 162 | mutex_unlock(&assigned_dev->kvm->lock); |
| 118 | } | 163 | } |
| 119 | 164 | ||
| 120 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | 165 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) |
| 121 | { | 166 | { |
| 167 | unsigned long flags; | ||
| 122 | struct kvm_assigned_dev_kernel *assigned_dev = | 168 | struct kvm_assigned_dev_kernel *assigned_dev = |
| 123 | (struct kvm_assigned_dev_kernel *) dev_id; | 169 | (struct kvm_assigned_dev_kernel *) dev_id; |
| 124 | 170 | ||
| 171 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
| 172 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 173 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
| 174 | if (index < 0) | ||
| 175 | goto out; | ||
| 176 | assigned_dev->guest_msix_entries[index].flags |= | ||
| 177 | KVM_ASSIGNED_MSIX_PENDING; | ||
| 178 | } | ||
| 179 | |||
| 125 | schedule_work(&assigned_dev->interrupt_work); | 180 | schedule_work(&assigned_dev->interrupt_work); |
| 126 | 181 | ||
| 127 | disable_irq_nosync(irq); | 182 | disable_irq_nosync(irq); |
| 128 | assigned_dev->host_irq_disabled = true; | 183 | assigned_dev->host_irq_disabled = true; |
| 129 | 184 | ||
| 185 | out: | ||
| 186 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
| 130 | return IRQ_HANDLED; | 187 | return IRQ_HANDLED; |
| 131 | } | 188 | } |
| 132 | 189 | ||
| @@ -134,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | |||
| 134 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | 191 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) |
| 135 | { | 192 | { |
| 136 | struct kvm_assigned_dev_kernel *dev; | 193 | struct kvm_assigned_dev_kernel *dev; |
| 194 | unsigned long flags; | ||
| 137 | 195 | ||
| 138 | if (kian->gsi == -1) | 196 | if (kian->gsi == -1) |
| 139 | return; | 197 | return; |
| @@ -146,28 +204,30 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
| 146 | /* The guest irq may be shared so this ack may be | 204 | /* The guest irq may be shared so this ack may be |
| 147 | * from another device. | 205 | * from another device. |
| 148 | */ | 206 | */ |
| 207 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
| 149 | if (dev->host_irq_disabled) { | 208 | if (dev->host_irq_disabled) { |
| 150 | enable_irq(dev->host_irq); | 209 | enable_irq(dev->host_irq); |
| 151 | dev->host_irq_disabled = false; | 210 | dev->host_irq_disabled = false; |
| 152 | } | 211 | } |
| 212 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
| 153 | } | 213 | } |
| 154 | 214 | ||
| 155 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | 215 | static void deassign_guest_irq(struct kvm *kvm, |
| 156 | static void kvm_free_assigned_irq(struct kvm *kvm, | 216 | struct kvm_assigned_dev_kernel *assigned_dev) |
| 157 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 158 | { | 217 | { |
| 159 | if (!irqchip_in_kernel(kvm)) | ||
| 160 | return; | ||
| 161 | |||
| 162 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); | 218 | kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); |
| 219 | assigned_dev->ack_notifier.gsi = -1; | ||
| 163 | 220 | ||
| 164 | if (assigned_dev->irq_source_id != -1) | 221 | if (assigned_dev->irq_source_id != -1) |
| 165 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | 222 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); |
| 166 | assigned_dev->irq_source_id = -1; | 223 | assigned_dev->irq_source_id = -1; |
| 224 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
| 225 | } | ||
| 167 | 226 | ||
| 168 | if (!assigned_dev->irq_requested_type) | 227 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ |
| 169 | return; | 228 | static void deassign_host_irq(struct kvm *kvm, |
| 170 | 229 | struct kvm_assigned_dev_kernel *assigned_dev) | |
| 230 | { | ||
| 171 | /* | 231 | /* |
| 172 | * In kvm_free_device_irq, cancel_work_sync return true if: | 232 | * In kvm_free_device_irq, cancel_work_sync return true if: |
| 173 | * 1. work is scheduled, and then cancelled. | 233 | * 1. work is scheduled, and then cancelled. |
| @@ -184,17 +244,64 @@ static void kvm_free_assigned_irq(struct kvm *kvm, | |||
| 184 | * now, the kvm state is still legal for probably we also have to wait | 244 | * now, the kvm state is still legal for probably we also have to wait |
| 185 | * interrupt_work done. | 245 | * interrupt_work done. |
| 186 | */ | 246 | */ |
| 187 | disable_irq_nosync(assigned_dev->host_irq); | 247 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { |
| 188 | cancel_work_sync(&assigned_dev->interrupt_work); | 248 | int i; |
| 249 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 250 | disable_irq_nosync(assigned_dev-> | ||
| 251 | host_msix_entries[i].vector); | ||
| 252 | |||
| 253 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
| 254 | |||
| 255 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 256 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
| 257 | (void *)assigned_dev); | ||
| 258 | |||
| 259 | assigned_dev->entries_nr = 0; | ||
| 260 | kfree(assigned_dev->host_msix_entries); | ||
| 261 | kfree(assigned_dev->guest_msix_entries); | ||
| 262 | pci_disable_msix(assigned_dev->dev); | ||
| 263 | } else { | ||
| 264 | /* Deal with MSI and INTx */ | ||
| 265 | disable_irq_nosync(assigned_dev->host_irq); | ||
| 266 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
| 189 | 267 | ||
| 190 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | 268 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); |
| 191 | 269 | ||
| 192 | if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) | 270 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) |
| 193 | pci_disable_msi(assigned_dev->dev); | 271 | pci_disable_msi(assigned_dev->dev); |
| 272 | } | ||
| 194 | 273 | ||
| 195 | assigned_dev->irq_requested_type = 0; | 274 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); |
| 196 | } | 275 | } |
| 197 | 276 | ||
| 277 | static int kvm_deassign_irq(struct kvm *kvm, | ||
| 278 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
| 279 | unsigned long irq_requested_type) | ||
| 280 | { | ||
| 281 | unsigned long guest_irq_type, host_irq_type; | ||
| 282 | |||
| 283 | if (!irqchip_in_kernel(kvm)) | ||
| 284 | return -EINVAL; | ||
| 285 | /* no irq assignment to deassign */ | ||
| 286 | if (!assigned_dev->irq_requested_type) | ||
| 287 | return -ENXIO; | ||
| 288 | |||
| 289 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
| 290 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
| 291 | |||
| 292 | if (host_irq_type) | ||
| 293 | deassign_host_irq(kvm, assigned_dev); | ||
| 294 | if (guest_irq_type) | ||
| 295 | deassign_guest_irq(kvm, assigned_dev); | ||
| 296 | |||
| 297 | return 0; | ||
| 298 | } | ||
| 299 | |||
| 300 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
| 301 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 302 | { | ||
| 303 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
| 304 | } | ||
| 198 | 305 | ||
| 199 | static void kvm_free_assigned_device(struct kvm *kvm, | 306 | static void kvm_free_assigned_device(struct kvm *kvm, |
| 200 | struct kvm_assigned_dev_kernel | 307 | struct kvm_assigned_dev_kernel |
| @@ -226,190 +333,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) | |||
| 226 | } | 333 | } |
| 227 | } | 334 | } |
| 228 | 335 | ||
| 229 | static int assigned_device_update_intx(struct kvm *kvm, | 336 | static int assigned_device_enable_host_intx(struct kvm *kvm, |
| 230 | struct kvm_assigned_dev_kernel *adev, | 337 | struct kvm_assigned_dev_kernel *dev) |
| 231 | struct kvm_assigned_irq *airq) | ||
| 232 | { | 338 | { |
| 233 | adev->guest_irq = airq->guest_irq; | 339 | dev->host_irq = dev->dev->irq; |
| 234 | adev->ack_notifier.gsi = airq->guest_irq; | 340 | /* Even though this is PCI, we don't want to use shared |
| 341 | * interrupts. Sharing host devices with guest-assigned devices | ||
| 342 | * on the same interrupt line is not a happy situation: there | ||
| 343 | * are going to be long delays in accepting, acking, etc. | ||
| 344 | */ | ||
| 345 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
| 346 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
| 347 | return -EIO; | ||
| 348 | return 0; | ||
| 349 | } | ||
| 235 | 350 | ||
| 236 | if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) | 351 | #ifdef __KVM_HAVE_MSI |
| 237 | return 0; | 352 | static int assigned_device_enable_host_msi(struct kvm *kvm, |
| 353 | struct kvm_assigned_dev_kernel *dev) | ||
| 354 | { | ||
| 355 | int r; | ||
| 238 | 356 | ||
| 239 | if (irqchip_in_kernel(kvm)) { | 357 | if (!dev->dev->msi_enabled) { |
| 240 | if (!msi2intx && | 358 | r = pci_enable_msi(dev->dev); |
| 241 | (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { | 359 | if (r) |
| 242 | free_irq(adev->host_irq, (void *)adev); | 360 | return r; |
| 243 | pci_disable_msi(adev->dev); | 361 | } |
| 244 | } | ||
| 245 | 362 | ||
| 246 | if (!capable(CAP_SYS_RAWIO)) | 363 | dev->host_irq = dev->dev->irq; |
| 247 | return -EPERM; | 364 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, |
| 365 | "kvm_assigned_msi_device", (void *)dev)) { | ||
| 366 | pci_disable_msi(dev->dev); | ||
| 367 | return -EIO; | ||
| 368 | } | ||
| 248 | 369 | ||
| 249 | if (airq->host_irq) | 370 | return 0; |
| 250 | adev->host_irq = airq->host_irq; | 371 | } |
| 251 | else | 372 | #endif |
| 252 | adev->host_irq = adev->dev->irq; | ||
| 253 | 373 | ||
| 254 | /* Even though this is PCI, we don't want to use shared | 374 | #ifdef __KVM_HAVE_MSIX |
| 255 | * interrupts. Sharing host devices with guest-assigned devices | 375 | static int assigned_device_enable_host_msix(struct kvm *kvm, |
| 256 | * on the same interrupt line is not a happy situation: there | 376 | struct kvm_assigned_dev_kernel *dev) |
| 257 | * are going to be long delays in accepting, acking, etc. | 377 | { |
| 258 | */ | 378 | int i, r = -EINVAL; |
| 259 | if (request_irq(adev->host_irq, kvm_assigned_dev_intr, | 379 | |
| 260 | 0, "kvm_assigned_intx_device", (void *)adev)) | 380 | /* host_msix_entries and guest_msix_entries should have been |
| 261 | return -EIO; | 381 | * initialized */ |
| 382 | if (dev->entries_nr == 0) | ||
| 383 | return r; | ||
| 384 | |||
| 385 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
| 386 | if (r) | ||
| 387 | return r; | ||
| 388 | |||
| 389 | for (i = 0; i < dev->entries_nr; i++) { | ||
| 390 | r = request_irq(dev->host_msix_entries[i].vector, | ||
| 391 | kvm_assigned_dev_intr, 0, | ||
| 392 | "kvm_assigned_msix_device", | ||
| 393 | (void *)dev); | ||
| 394 | /* FIXME: free requested_irq's on failure */ | ||
| 395 | if (r) | ||
| 396 | return r; | ||
| 262 | } | 397 | } |
| 263 | 398 | ||
| 264 | adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | | ||
| 265 | KVM_ASSIGNED_DEV_HOST_INTX; | ||
| 266 | return 0; | 399 | return 0; |
| 267 | } | 400 | } |
| 268 | 401 | ||
| 269 | #ifdef CONFIG_X86 | 402 | #endif |
| 270 | static int assigned_device_update_msi(struct kvm *kvm, | 403 | |
| 271 | struct kvm_assigned_dev_kernel *adev, | 404 | static int assigned_device_enable_guest_intx(struct kvm *kvm, |
| 272 | struct kvm_assigned_irq *airq) | 405 | struct kvm_assigned_dev_kernel *dev, |
| 406 | struct kvm_assigned_irq *irq) | ||
| 273 | { | 407 | { |
| 274 | int r; | 408 | dev->guest_irq = irq->guest_irq; |
| 409 | dev->ack_notifier.gsi = irq->guest_irq; | ||
| 410 | return 0; | ||
| 411 | } | ||
| 275 | 412 | ||
| 276 | adev->guest_irq = airq->guest_irq; | 413 | #ifdef __KVM_HAVE_MSI |
| 277 | if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { | 414 | static int assigned_device_enable_guest_msi(struct kvm *kvm, |
| 278 | /* x86 don't care upper address of guest msi message addr */ | 415 | struct kvm_assigned_dev_kernel *dev, |
| 279 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; | 416 | struct kvm_assigned_irq *irq) |
| 280 | adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; | 417 | { |
| 281 | adev->ack_notifier.gsi = -1; | 418 | dev->guest_irq = irq->guest_irq; |
| 282 | } else if (msi2intx) { | 419 | dev->ack_notifier.gsi = -1; |
| 283 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; | 420 | return 0; |
| 284 | adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; | 421 | } |
| 285 | adev->ack_notifier.gsi = airq->guest_irq; | 422 | #endif |
| 286 | } else { | 423 | #ifdef __KVM_HAVE_MSIX |
| 287 | /* | 424 | static int assigned_device_enable_guest_msix(struct kvm *kvm, |
| 288 | * Guest require to disable device MSI, we disable MSI and | 425 | struct kvm_assigned_dev_kernel *dev, |
| 289 | * re-enable INTx by default again. Notice it's only for | 426 | struct kvm_assigned_irq *irq) |
| 290 | * non-msi2intx. | 427 | { |
| 291 | */ | 428 | dev->guest_irq = irq->guest_irq; |
| 292 | assigned_device_update_intx(kvm, adev, airq); | 429 | dev->ack_notifier.gsi = -1; |
| 293 | return 0; | 430 | return 0; |
| 431 | } | ||
| 432 | #endif | ||
| 433 | |||
| 434 | static int assign_host_irq(struct kvm *kvm, | ||
| 435 | struct kvm_assigned_dev_kernel *dev, | ||
| 436 | __u32 host_irq_type) | ||
| 437 | { | ||
| 438 | int r = -EEXIST; | ||
| 439 | |||
| 440 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
| 441 | return r; | ||
| 442 | |||
| 443 | switch (host_irq_type) { | ||
| 444 | case KVM_DEV_IRQ_HOST_INTX: | ||
| 445 | r = assigned_device_enable_host_intx(kvm, dev); | ||
| 446 | break; | ||
| 447 | #ifdef __KVM_HAVE_MSI | ||
| 448 | case KVM_DEV_IRQ_HOST_MSI: | ||
| 449 | r = assigned_device_enable_host_msi(kvm, dev); | ||
| 450 | break; | ||
| 451 | #endif | ||
| 452 | #ifdef __KVM_HAVE_MSIX | ||
| 453 | case KVM_DEV_IRQ_HOST_MSIX: | ||
| 454 | r = assigned_device_enable_host_msix(kvm, dev); | ||
| 455 | break; | ||
| 456 | #endif | ||
| 457 | default: | ||
| 458 | r = -EINVAL; | ||
| 294 | } | 459 | } |
| 295 | 460 | ||
| 296 | if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) | 461 | if (!r) |
| 297 | return 0; | 462 | dev->irq_requested_type |= host_irq_type; |
| 298 | 463 | ||
| 299 | if (irqchip_in_kernel(kvm)) { | 464 | return r; |
| 300 | if (!msi2intx) { | 465 | } |
| 301 | if (adev->irq_requested_type & | ||
| 302 | KVM_ASSIGNED_DEV_HOST_INTX) | ||
| 303 | free_irq(adev->host_irq, (void *)adev); | ||
| 304 | 466 | ||
| 305 | r = pci_enable_msi(adev->dev); | 467 | static int assign_guest_irq(struct kvm *kvm, |
| 306 | if (r) | 468 | struct kvm_assigned_dev_kernel *dev, |
| 307 | return r; | 469 | struct kvm_assigned_irq *irq, |
| 308 | } | 470 | unsigned long guest_irq_type) |
| 471 | { | ||
| 472 | int id; | ||
| 473 | int r = -EEXIST; | ||
| 474 | |||
| 475 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
| 476 | return r; | ||
| 309 | 477 | ||
| 310 | adev->host_irq = adev->dev->irq; | 478 | id = kvm_request_irq_source_id(kvm); |
| 311 | if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, | 479 | if (id < 0) |
| 312 | "kvm_assigned_msi_device", (void *)adev)) | 480 | return id; |
| 313 | return -EIO; | 481 | |
| 482 | dev->irq_source_id = id; | ||
| 483 | |||
| 484 | switch (guest_irq_type) { | ||
| 485 | case KVM_DEV_IRQ_GUEST_INTX: | ||
| 486 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
| 487 | break; | ||
| 488 | #ifdef __KVM_HAVE_MSI | ||
| 489 | case KVM_DEV_IRQ_GUEST_MSI: | ||
| 490 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
| 491 | break; | ||
| 492 | #endif | ||
| 493 | #ifdef __KVM_HAVE_MSIX | ||
| 494 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
| 495 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
| 496 | break; | ||
| 497 | #endif | ||
| 498 | default: | ||
| 499 | r = -EINVAL; | ||
| 314 | } | 500 | } |
| 315 | 501 | ||
| 316 | if (!msi2intx) | 502 | if (!r) { |
| 317 | adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; | 503 | dev->irq_requested_type |= guest_irq_type; |
| 504 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
| 505 | } else | ||
| 506 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
| 318 | 507 | ||
| 319 | adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; | 508 | return r; |
| 320 | return 0; | ||
| 321 | } | 509 | } |
| 322 | #endif | ||
| 323 | 510 | ||
| 511 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
| 324 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | 512 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, |
| 325 | struct kvm_assigned_irq | 513 | struct kvm_assigned_irq *assigned_irq) |
| 326 | *assigned_irq) | ||
| 327 | { | 514 | { |
| 328 | int r = 0; | 515 | int r = -EINVAL; |
| 329 | struct kvm_assigned_dev_kernel *match; | 516 | struct kvm_assigned_dev_kernel *match; |
| 330 | u32 current_flags = 0, changed_flags; | 517 | unsigned long host_irq_type, guest_irq_type; |
| 331 | 518 | ||
| 332 | mutex_lock(&kvm->lock); | 519 | if (!capable(CAP_SYS_RAWIO)) |
| 520 | return -EPERM; | ||
| 333 | 521 | ||
| 522 | if (!irqchip_in_kernel(kvm)) | ||
| 523 | return r; | ||
| 524 | |||
| 525 | mutex_lock(&kvm->lock); | ||
| 526 | r = -ENODEV; | ||
| 334 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | 527 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
| 335 | assigned_irq->assigned_dev_id); | 528 | assigned_irq->assigned_dev_id); |
| 336 | if (!match) { | 529 | if (!match) |
| 337 | mutex_unlock(&kvm->lock); | 530 | goto out; |
| 338 | return -EINVAL; | ||
| 339 | } | ||
| 340 | |||
| 341 | if (!match->irq_requested_type) { | ||
| 342 | INIT_WORK(&match->interrupt_work, | ||
| 343 | kvm_assigned_dev_interrupt_work_handler); | ||
| 344 | if (irqchip_in_kernel(kvm)) { | ||
| 345 | /* Register ack nofitier */ | ||
| 346 | match->ack_notifier.gsi = -1; | ||
| 347 | match->ack_notifier.irq_acked = | ||
| 348 | kvm_assigned_dev_ack_irq; | ||
| 349 | kvm_register_irq_ack_notifier(kvm, | ||
| 350 | &match->ack_notifier); | ||
| 351 | |||
| 352 | /* Request IRQ source ID */ | ||
| 353 | r = kvm_request_irq_source_id(kvm); | ||
| 354 | if (r < 0) | ||
| 355 | goto out_release; | ||
| 356 | else | ||
| 357 | match->irq_source_id = r; | ||
| 358 | |||
| 359 | #ifdef CONFIG_X86 | ||
| 360 | /* Determine host device irq type, we can know the | ||
| 361 | * result from dev->msi_enabled */ | ||
| 362 | if (msi2intx) | ||
| 363 | pci_enable_msi(match->dev); | ||
| 364 | #endif | ||
| 365 | } | ||
| 366 | } | ||
| 367 | 531 | ||
| 368 | if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && | 532 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); |
| 369 | (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) | 533 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); |
| 370 | current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; | ||
| 371 | 534 | ||
| 372 | changed_flags = assigned_irq->flags ^ current_flags; | 535 | r = -EINVAL; |
| 536 | /* can only assign one type at a time */ | ||
| 537 | if (hweight_long(host_irq_type) > 1) | ||
| 538 | goto out; | ||
| 539 | if (hweight_long(guest_irq_type) > 1) | ||
| 540 | goto out; | ||
| 541 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
| 542 | goto out; | ||
| 373 | 543 | ||
| 374 | if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || | 544 | r = 0; |
| 375 | (msi2intx && match->dev->msi_enabled)) { | 545 | if (host_irq_type) |
| 376 | #ifdef CONFIG_X86 | 546 | r = assign_host_irq(kvm, match, host_irq_type); |
| 377 | r = assigned_device_update_msi(kvm, match, assigned_irq); | 547 | if (r) |
| 378 | if (r) { | 548 | goto out; |
| 379 | printk(KERN_WARNING "kvm: failed to enable " | ||
| 380 | "MSI device!\n"); | ||
| 381 | goto out_release; | ||
| 382 | } | ||
| 383 | #else | ||
| 384 | r = -ENOTTY; | ||
| 385 | #endif | ||
| 386 | } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { | ||
| 387 | /* Host device IRQ 0 means don't support INTx */ | ||
| 388 | if (!msi2intx) { | ||
| 389 | printk(KERN_WARNING | ||
| 390 | "kvm: wait device to enable MSI!\n"); | ||
| 391 | r = 0; | ||
| 392 | } else { | ||
| 393 | printk(KERN_WARNING | ||
| 394 | "kvm: failed to enable MSI device!\n"); | ||
| 395 | r = -ENOTTY; | ||
| 396 | goto out_release; | ||
| 397 | } | ||
| 398 | } else { | ||
| 399 | /* Non-sharing INTx mode */ | ||
| 400 | r = assigned_device_update_intx(kvm, match, assigned_irq); | ||
| 401 | if (r) { | ||
| 402 | printk(KERN_WARNING "kvm: failed to enable " | ||
| 403 | "INTx device!\n"); | ||
| 404 | goto out_release; | ||
| 405 | } | ||
| 406 | } | ||
| 407 | 549 | ||
| 550 | if (guest_irq_type) | ||
| 551 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
| 552 | out: | ||
| 408 | mutex_unlock(&kvm->lock); | 553 | mutex_unlock(&kvm->lock); |
| 409 | return r; | 554 | return r; |
| 410 | out_release: | 555 | } |
| 556 | |||
| 557 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
| 558 | struct kvm_assigned_irq | ||
| 559 | *assigned_irq) | ||
| 560 | { | ||
| 561 | int r = -ENODEV; | ||
| 562 | struct kvm_assigned_dev_kernel *match; | ||
| 563 | |||
| 564 | mutex_lock(&kvm->lock); | ||
| 565 | |||
| 566 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 567 | assigned_irq->assigned_dev_id); | ||
| 568 | if (!match) | ||
| 569 | goto out; | ||
| 570 | |||
| 571 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
| 572 | out: | ||
| 411 | mutex_unlock(&kvm->lock); | 573 | mutex_unlock(&kvm->lock); |
| 412 | kvm_free_assigned_device(kvm, match); | ||
| 413 | return r; | 574 | return r; |
| 414 | } | 575 | } |
| 415 | 576 | ||
| @@ -427,7 +588,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
| 427 | assigned_dev->assigned_dev_id); | 588 | assigned_dev->assigned_dev_id); |
| 428 | if (match) { | 589 | if (match) { |
| 429 | /* device already assigned */ | 590 | /* device already assigned */ |
| 430 | r = -EINVAL; | 591 | r = -EEXIST; |
| 431 | goto out; | 592 | goto out; |
| 432 | } | 593 | } |
| 433 | 594 | ||
| @@ -464,8 +625,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
| 464 | match->host_devfn = assigned_dev->devfn; | 625 | match->host_devfn = assigned_dev->devfn; |
| 465 | match->flags = assigned_dev->flags; | 626 | match->flags = assigned_dev->flags; |
| 466 | match->dev = dev; | 627 | match->dev = dev; |
| 628 | spin_lock_init(&match->assigned_dev_lock); | ||
| 467 | match->irq_source_id = -1; | 629 | match->irq_source_id = -1; |
| 468 | match->kvm = kvm; | 630 | match->kvm = kvm; |
| 631 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
| 632 | INIT_WORK(&match->interrupt_work, | ||
| 633 | kvm_assigned_dev_interrupt_work_handler); | ||
| 469 | 634 | ||
| 470 | list_add(&match->list, &kvm->arch.assigned_dev_head); | 635 | list_add(&match->list, &kvm->arch.assigned_dev_head); |
| 471 | 636 | ||
| @@ -878,6 +1043,8 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
| 878 | #endif | 1043 | #endif |
| 879 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 1044 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
| 880 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 1045 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
| 1046 | #else | ||
| 1047 | kvm_arch_flush_shadow(kvm); | ||
| 881 | #endif | 1048 | #endif |
| 882 | kvm_arch_destroy_vm(kvm); | 1049 | kvm_arch_destroy_vm(kvm); |
| 883 | mmdrop(mm); | 1050 | mmdrop(mm); |
| @@ -919,9 +1086,8 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 919 | { | 1086 | { |
| 920 | int r; | 1087 | int r; |
| 921 | gfn_t base_gfn; | 1088 | gfn_t base_gfn; |
| 922 | unsigned long npages; | 1089 | unsigned long npages, ugfn; |
| 923 | int largepages; | 1090 | unsigned long largepages, i; |
| 924 | unsigned long i; | ||
| 925 | struct kvm_memory_slot *memslot; | 1091 | struct kvm_memory_slot *memslot; |
| 926 | struct kvm_memory_slot old, new; | 1092 | struct kvm_memory_slot old, new; |
| 927 | 1093 | ||
| @@ -1010,6 +1176,14 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 1010 | new.lpage_info[0].write_count = 1; | 1176 | new.lpage_info[0].write_count = 1; |
| 1011 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) | 1177 | if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) |
| 1012 | new.lpage_info[largepages-1].write_count = 1; | 1178 | new.lpage_info[largepages-1].write_count = 1; |
| 1179 | ugfn = new.userspace_addr >> PAGE_SHIFT; | ||
| 1180 | /* | ||
| 1181 | * If the gfn and userspace address are not aligned wrt each | ||
| 1182 | * other, disable large page support for this slot | ||
| 1183 | */ | ||
| 1184 | if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1)) | ||
| 1185 | for (i = 0; i < largepages; ++i) | ||
| 1186 | new.lpage_info[i].write_count = 1; | ||
| 1013 | } | 1187 | } |
| 1014 | 1188 | ||
| 1015 | /* Allocate page dirty bitmap if needed */ | 1189 | /* Allocate page dirty bitmap if needed */ |
| @@ -1043,8 +1217,10 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 1043 | 1217 | ||
| 1044 | kvm_free_physmem_slot(&old, npages ? &new : NULL); | 1218 | kvm_free_physmem_slot(&old, npages ? &new : NULL); |
| 1045 | /* Slot deletion case: we have to update the current slot */ | 1219 | /* Slot deletion case: we have to update the current slot */ |
| 1220 | spin_lock(&kvm->mmu_lock); | ||
| 1046 | if (!npages) | 1221 | if (!npages) |
| 1047 | *memslot = old; | 1222 | *memslot = old; |
| 1223 | spin_unlock(&kvm->mmu_lock); | ||
| 1048 | #ifdef CONFIG_DMAR | 1224 | #ifdef CONFIG_DMAR |
| 1049 | /* map the pages in iommu page table */ | 1225 | /* map the pages in iommu page table */ |
| 1050 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); | 1226 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); |
| @@ -1454,12 +1630,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 1454 | for (;;) { | 1630 | for (;;) { |
| 1455 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1631 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
| 1456 | 1632 | ||
| 1457 | if (kvm_cpu_has_interrupt(vcpu) || | 1633 | if ((kvm_arch_interrupt_allowed(vcpu) && |
| 1458 | kvm_cpu_has_pending_timer(vcpu) || | 1634 | kvm_cpu_has_interrupt(vcpu)) || |
| 1459 | kvm_arch_vcpu_runnable(vcpu)) { | 1635 | kvm_arch_vcpu_runnable(vcpu)) { |
| 1460 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); | 1636 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); |
| 1461 | break; | 1637 | break; |
| 1462 | } | 1638 | } |
| 1639 | if (kvm_cpu_has_pending_timer(vcpu)) | ||
| 1640 | break; | ||
| 1463 | if (signal_pending(current)) | 1641 | if (signal_pending(current)) |
| 1464 | break; | 1642 | break; |
| 1465 | 1643 | ||
| @@ -1593,6 +1771,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | |||
| 1593 | return 0; | 1771 | return 0; |
| 1594 | } | 1772 | } |
| 1595 | 1773 | ||
| 1774 | #ifdef __KVM_HAVE_MSIX | ||
| 1775 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
| 1776 | struct kvm_assigned_msix_nr *entry_nr) | ||
| 1777 | { | ||
| 1778 | int r = 0; | ||
| 1779 | struct kvm_assigned_dev_kernel *adev; | ||
| 1780 | |||
| 1781 | mutex_lock(&kvm->lock); | ||
| 1782 | |||
| 1783 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 1784 | entry_nr->assigned_dev_id); | ||
| 1785 | if (!adev) { | ||
| 1786 | r = -EINVAL; | ||
| 1787 | goto msix_nr_out; | ||
| 1788 | } | ||
| 1789 | |||
| 1790 | if (adev->entries_nr == 0) { | ||
| 1791 | adev->entries_nr = entry_nr->entry_nr; | ||
| 1792 | if (adev->entries_nr == 0 || | ||
| 1793 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
| 1794 | r = -EINVAL; | ||
| 1795 | goto msix_nr_out; | ||
| 1796 | } | ||
| 1797 | |||
| 1798 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
| 1799 | entry_nr->entry_nr, | ||
| 1800 | GFP_KERNEL); | ||
| 1801 | if (!adev->host_msix_entries) { | ||
| 1802 | r = -ENOMEM; | ||
| 1803 | goto msix_nr_out; | ||
| 1804 | } | ||
| 1805 | adev->guest_msix_entries = kzalloc( | ||
| 1806 | sizeof(struct kvm_guest_msix_entry) * | ||
| 1807 | entry_nr->entry_nr, GFP_KERNEL); | ||
| 1808 | if (!adev->guest_msix_entries) { | ||
| 1809 | kfree(adev->host_msix_entries); | ||
| 1810 | r = -ENOMEM; | ||
| 1811 | goto msix_nr_out; | ||
| 1812 | } | ||
| 1813 | } else /* Not allowed set MSI-X number twice */ | ||
| 1814 | r = -EINVAL; | ||
| 1815 | msix_nr_out: | ||
| 1816 | mutex_unlock(&kvm->lock); | ||
| 1817 | return r; | ||
| 1818 | } | ||
| 1819 | |||
| 1820 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
| 1821 | struct kvm_assigned_msix_entry *entry) | ||
| 1822 | { | ||
| 1823 | int r = 0, i; | ||
| 1824 | struct kvm_assigned_dev_kernel *adev; | ||
| 1825 | |||
| 1826 | mutex_lock(&kvm->lock); | ||
| 1827 | |||
| 1828 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 1829 | entry->assigned_dev_id); | ||
| 1830 | |||
| 1831 | if (!adev) { | ||
| 1832 | r = -EINVAL; | ||
| 1833 | goto msix_entry_out; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | for (i = 0; i < adev->entries_nr; i++) | ||
| 1837 | if (adev->guest_msix_entries[i].vector == 0 || | ||
| 1838 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
| 1839 | adev->guest_msix_entries[i].entry = entry->entry; | ||
| 1840 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
| 1841 | adev->host_msix_entries[i].entry = entry->entry; | ||
| 1842 | break; | ||
| 1843 | } | ||
| 1844 | if (i == adev->entries_nr) { | ||
| 1845 | r = -ENOSPC; | ||
| 1846 | goto msix_entry_out; | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | msix_entry_out: | ||
| 1850 | mutex_unlock(&kvm->lock); | ||
| 1851 | |||
| 1852 | return r; | ||
| 1853 | } | ||
| 1854 | #endif | ||
| 1855 | |||
| 1596 | static long kvm_vcpu_ioctl(struct file *filp, | 1856 | static long kvm_vcpu_ioctl(struct file *filp, |
| 1597 | unsigned int ioctl, unsigned long arg) | 1857 | unsigned int ioctl, unsigned long arg) |
| 1598 | { | 1858 | { |
| @@ -1864,6 +2124,11 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1864 | break; | 2124 | break; |
| 1865 | } | 2125 | } |
| 1866 | case KVM_ASSIGN_IRQ: { | 2126 | case KVM_ASSIGN_IRQ: { |
| 2127 | r = -EOPNOTSUPP; | ||
| 2128 | break; | ||
| 2129 | } | ||
| 2130 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
| 2131 | case KVM_ASSIGN_DEV_IRQ: { | ||
| 1867 | struct kvm_assigned_irq assigned_irq; | 2132 | struct kvm_assigned_irq assigned_irq; |
| 1868 | 2133 | ||
| 1869 | r = -EFAULT; | 2134 | r = -EFAULT; |
| @@ -1874,6 +2139,18 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1874 | goto out; | 2139 | goto out; |
| 1875 | break; | 2140 | break; |
| 1876 | } | 2141 | } |
| 2142 | case KVM_DEASSIGN_DEV_IRQ: { | ||
| 2143 | struct kvm_assigned_irq assigned_irq; | ||
| 2144 | |||
| 2145 | r = -EFAULT; | ||
| 2146 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
| 2147 | goto out; | ||
| 2148 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
| 2149 | if (r) | ||
| 2150 | goto out; | ||
| 2151 | break; | ||
| 2152 | } | ||
| 2153 | #endif | ||
| 1877 | #endif | 2154 | #endif |
| 1878 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | 2155 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT |
| 1879 | case KVM_DEASSIGN_PCI_DEVICE: { | 2156 | case KVM_DEASSIGN_PCI_DEVICE: { |
| @@ -1917,7 +2194,29 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1917 | vfree(entries); | 2194 | vfree(entries); |
| 1918 | break; | 2195 | break; |
| 1919 | } | 2196 | } |
| 2197 | #ifdef __KVM_HAVE_MSIX | ||
| 2198 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
| 2199 | struct kvm_assigned_msix_nr entry_nr; | ||
| 2200 | r = -EFAULT; | ||
| 2201 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
| 2202 | goto out; | ||
| 2203 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
| 2204 | if (r) | ||
| 2205 | goto out; | ||
| 2206 | break; | ||
| 2207 | } | ||
| 2208 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
| 2209 | struct kvm_assigned_msix_entry entry; | ||
| 2210 | r = -EFAULT; | ||
| 2211 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
| 2212 | goto out; | ||
| 2213 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
| 2214 | if (r) | ||
| 2215 | goto out; | ||
| 2216 | break; | ||
| 2217 | } | ||
| 1920 | #endif | 2218 | #endif |
| 2219 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
| 1921 | default: | 2220 | default: |
| 1922 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 2221 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
| 1923 | } | 2222 | } |
| @@ -2112,15 +2411,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); | |||
| 2112 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, | 2411 | static int kvm_reboot(struct notifier_block *notifier, unsigned long val, |
| 2113 | void *v) | 2412 | void *v) |
| 2114 | { | 2413 | { |
| 2115 | if (val == SYS_RESTART) { | 2414 | /* |
| 2116 | /* | 2415 | * Some (well, at least mine) BIOSes hang on reboot if |
| 2117 | * Some (well, at least mine) BIOSes hang on reboot if | 2416 | * in vmx root mode. |
| 2118 | * in vmx root mode. | 2417 | * |
| 2119 | */ | 2418 | * And Intel TXT required VMX off for all cpu when system shutdown. |
| 2120 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); | 2419 | */ |
| 2121 | kvm_rebooting = true; | 2420 | printk(KERN_INFO "kvm: exiting hardware virtualization\n"); |
| 2122 | on_each_cpu(hardware_disable, NULL, 1); | 2421 | kvm_rebooting = true; |
| 2123 | } | 2422 | on_each_cpu(hardware_disable, NULL, 1); |
| 2124 | return NOTIFY_OK; | 2423 | return NOTIFY_OK; |
| 2125 | } | 2424 | } |
| 2126 | 2425 | ||
| @@ -2354,9 +2653,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 2354 | 2653 | ||
| 2355 | kvm_preempt_ops.sched_in = kvm_sched_in; | 2654 | kvm_preempt_ops.sched_in = kvm_sched_in; |
| 2356 | kvm_preempt_ops.sched_out = kvm_sched_out; | 2655 | kvm_preempt_ops.sched_out = kvm_sched_out; |
| 2357 | #ifndef CONFIG_X86 | ||
| 2358 | msi2intx = 0; | ||
| 2359 | #endif | ||
| 2360 | 2656 | ||
| 2361 | return 0; | 2657 | return 0; |
| 2362 | 2658 | ||
