diff options
| -rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 2 | ||||
| -rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 47 | ||||
| -rw-r--r-- | arch/powerpc/kvm/e500_tlb.c | 2 | ||||
| -rw-r--r-- | arch/s390/kvm/Kconfig | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/Kconfig | 1 | ||||
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/emulate.c | 10 | ||||
| -rw-r--r-- | arch/x86/kvm/irq.h | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/lapic.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/mmu.c | 117 | ||||
| -rw-r--r-- | arch/x86/kvm/pmu.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/svm.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx.c | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 26 | ||||
| -rw-r--r-- | include/linux/kvm_host.h | 62 | ||||
| -rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
| -rw-r--r-- | virt/kvm/iommu.c | 10 | ||||
| -rw-r--r-- | virt/kvm/irq_comm.c | 4 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 84 |
20 files changed, 277 insertions, 115 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 50ea12fd7bf5..572ad0141268 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -52,6 +52,8 @@ | |||
| 52 | 52 | ||
| 53 | struct kvm; | 53 | struct kvm; |
| 54 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 54 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
| 55 | extern int kvm_unmap_hva_range(struct kvm *kvm, | ||
| 56 | unsigned long start, unsigned long end); | ||
| 55 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 57 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
| 56 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 58 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
| 57 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 59 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d03eb6f7b058..3c635c0616b0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
| @@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 756 | goto out_put; | 756 | goto out_put; |
| 757 | } | 757 | } |
| 758 | 758 | ||
| 759 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 759 | static int kvm_handle_hva_range(struct kvm *kvm, |
| 760 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 760 | unsigned long start, |
| 761 | unsigned long gfn)) | 761 | unsigned long end, |
| 762 | int (*handler)(struct kvm *kvm, | ||
| 763 | unsigned long *rmapp, | ||
| 764 | unsigned long gfn)) | ||
| 762 | { | 765 | { |
| 763 | int ret; | 766 | int ret; |
| 764 | int retval = 0; | 767 | int retval = 0; |
| @@ -767,15 +770,25 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
| 767 | 770 | ||
| 768 | slots = kvm_memslots(kvm); | 771 | slots = kvm_memslots(kvm); |
| 769 | kvm_for_each_memslot(memslot, slots) { | 772 | kvm_for_each_memslot(memslot, slots) { |
| 770 | unsigned long start = memslot->userspace_addr; | 773 | unsigned long hva_start, hva_end; |
| 771 | unsigned long end; | 774 | gfn_t gfn, gfn_end; |
| 772 | 775 | ||
| 773 | end = start + (memslot->npages << PAGE_SHIFT); | 776 | hva_start = max(start, memslot->userspace_addr); |
| 774 | if (hva >= start && hva < end) { | 777 | hva_end = min(end, memslot->userspace_addr + |
| 775 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 778 | (memslot->npages << PAGE_SHIFT)); |
| 779 | if (hva_start >= hva_end) | ||
| 780 | continue; | ||
| 781 | /* | ||
| 782 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
| 783 | * {gfn, gfn+1, ..., gfn_end-1}. | ||
| 784 | */ | ||
| 785 | gfn = hva_to_gfn_memslot(hva_start, memslot); | ||
| 786 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
| 787 | |||
| 788 | for (; gfn < gfn_end; ++gfn) { | ||
| 789 | gfn_t gfn_offset = gfn - memslot->base_gfn; | ||
| 776 | 790 | ||
| 777 | ret = handler(kvm, &memslot->rmap[gfn_offset], | 791 | ret = handler(kvm, &memslot->rmap[gfn_offset], gfn); |
| 778 | memslot->base_gfn + gfn_offset); | ||
| 779 | retval |= ret; | 792 | retval |= ret; |
| 780 | } | 793 | } |
| 781 | } | 794 | } |
| @@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
| 783 | return retval; | 796 | return retval; |
| 784 | } | 797 | } |
| 785 | 798 | ||
| 799 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
| 800 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
| 801 | unsigned long gfn)) | ||
| 802 | { | ||
| 803 | return kvm_handle_hva_range(kvm, hva, hva + 1, handler); | ||
| 804 | } | ||
| 805 | |||
| 786 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 806 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 787 | unsigned long gfn) | 807 | unsigned long gfn) |
| 788 | { | 808 | { |
| @@ -850,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |||
| 850 | return 0; | 870 | return 0; |
| 851 | } | 871 | } |
| 852 | 872 | ||
| 873 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
| 874 | { | ||
| 875 | if (kvm->arch.using_mmu_notifiers) | ||
| 876 | kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); | ||
| 877 | return 0; | ||
| 878 | } | ||
| 879 | |||
| 853 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 880 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 854 | unsigned long gfn) | 881 | unsigned long gfn) |
| 855 | { | 882 | { |
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c510fc961302..c8f6c5826742 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c | |||
| @@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
| 520 | 520 | ||
| 521 | if (likely(!pfnmap)) { | 521 | if (likely(!pfnmap)) { |
| 522 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); | 522 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); |
| 523 | pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); | 523 | pfn = gfn_to_pfn_memslot(slot, gfn); |
| 524 | if (is_error_pfn(pfn)) { | 524 | if (is_error_pfn(pfn)) { |
| 525 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | 525 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", |
| 526 | (long)gfn); | 526 | (long)gfn); |
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 78eb9847008f..a6e2677724e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig | |||
| @@ -21,6 +21,7 @@ config KVM | |||
| 21 | depends on HAVE_KVM && EXPERIMENTAL | 21 | depends on HAVE_KVM && EXPERIMENTAL |
| 22 | select PREEMPT_NOTIFIERS | 22 | select PREEMPT_NOTIFIERS |
| 23 | select ANON_INODES | 23 | select ANON_INODES |
| 24 | select HAVE_KVM_CPU_RELAX_INTERCEPT | ||
| 24 | ---help--- | 25 | ---help--- |
| 25 | Support hosting paravirtualized guest machines using the SIE | 26 | Support hosting paravirtualized guest machines using the SIE |
| 26 | virtualization capability on the mainframe. This should work | 27 | virtualization capability on the mainframe. This should work |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7e..48e713188469 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -500,11 +500,11 @@ struct kvm_vcpu_arch { | |||
| 500 | }; | 500 | }; |
| 501 | 501 | ||
| 502 | struct kvm_lpage_info { | 502 | struct kvm_lpage_info { |
| 503 | unsigned long rmap_pde; | ||
| 504 | int write_count; | 503 | int write_count; |
| 505 | }; | 504 | }; |
| 506 | 505 | ||
| 507 | struct kvm_arch_memory_slot { | 506 | struct kvm_arch_memory_slot { |
| 507 | unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1]; | ||
| 508 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 508 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
| 509 | }; | 509 | }; |
| 510 | 510 | ||
| @@ -957,6 +957,7 @@ extern bool kvm_rebooting; | |||
| 957 | 957 | ||
| 958 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 958 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
| 959 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 959 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
| 960 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); | ||
| 960 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 961 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
| 961 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 962 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
| 962 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 963 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a28f338843ea..45c044f0fff7 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
| @@ -37,6 +37,7 @@ config KVM | |||
| 37 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
| 38 | select PERF_EVENTS | 38 | select PERF_EVENTS |
| 39 | select HAVE_KVM_MSI | 39 | select HAVE_KVM_MSI |
| 40 | select HAVE_KVM_CPU_RELAX_INTERCEPT | ||
| 40 | ---help--- | 41 | ---help--- |
| 41 | Support hosting fully virtualized guest machines using hardware | 42 | Support hosting fully virtualized guest machines using hardware |
| 42 | virtualization extensions. You will need a fairly recent | 43 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0595f1397b7c..b496da684bd6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
| 316 | } | 316 | } |
| 317 | case 7: { | 317 | case 7: { |
| 318 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 318 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
| 319 | /* Mask ebx against host capbability word 9 */ | 319 | /* Mask ebx against host capability word 9 */ |
| 320 | if (index == 0) { | 320 | if (index == 0) { |
| 321 | entry->ebx &= kvm_supported_word9_x86_features; | 321 | entry->ebx &= kvm_supported_word9_x86_features; |
| 322 | cpuid_mask(&entry->ebx, 9); | 322 | cpuid_mask(&entry->ebx, 9); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 97d9a9914ba8..85b611e13e84 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
| 642 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | 642 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) |
| 643 | goto bad; | 643 | goto bad; |
| 644 | } else { | 644 | } else { |
| 645 | /* exapand-down segment */ | 645 | /* expand-down segment */ |
| 646 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) | 646 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) |
| 647 | goto bad; | 647 | goto bad; |
| 648 | lim = desc.d ? 0xffffffff : 0xffff; | 648 | lim = desc.d ? 0xffffffff : 0xffff; |
| @@ -1383,7 +1383,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
| 1383 | err_code = selector & 0xfffc; | 1383 | err_code = selector & 0xfffc; |
| 1384 | err_vec = GP_VECTOR; | 1384 | err_vec = GP_VECTOR; |
| 1385 | 1385 | ||
| 1386 | /* can't load system descriptor into segment selecor */ | 1386 | /* can't load system descriptor into segment selector */ |
| 1387 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | 1387 | if (seg <= VCPU_SREG_GS && !seg_desc.s) |
| 1388 | goto exception; | 1388 | goto exception; |
| 1389 | 1389 | ||
| @@ -2398,7 +2398,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
| 2398 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); | 2398 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
| 2399 | 2399 | ||
| 2400 | /* | 2400 | /* |
| 2401 | * Now load segment descriptors. If fault happenes at this stage | 2401 | * Now load segment descriptors. If fault happens at this stage |
| 2402 | * it is handled in a context of new task | 2402 | * it is handled in a context of new task |
| 2403 | */ | 2403 | */ |
| 2404 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); | 2404 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); |
| @@ -2640,7 +2640,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2640 | * | 2640 | * |
| 2641 | * 1. jmp/call/int to task gate: Check against DPL of the task gate | 2641 | * 1. jmp/call/int to task gate: Check against DPL of the task gate |
| 2642 | * 2. Exception/IRQ/iret: No check is performed | 2642 | * 2. Exception/IRQ/iret: No check is performed |
| 2643 | * 3. jmp/call to TSS: Check agains DPL of the TSS | 2643 | * 3. jmp/call to TSS: Check against DPL of the TSS |
| 2644 | */ | 2644 | */ |
| 2645 | if (reason == TASK_SWITCH_GATE) { | 2645 | if (reason == TASK_SWITCH_GATE) { |
| 2646 | if (idt_index != -1) { | 2646 | if (idt_index != -1) { |
| @@ -2681,7 +2681,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
| 2681 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | 2681 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; |
| 2682 | 2682 | ||
| 2683 | /* set back link to prev task only if NT bit is set in eflags | 2683 | /* set back link to prev task only if NT bit is set in eflags |
| 2684 | note that old_tss_sel is not used afetr this point */ | 2684 | note that old_tss_sel is not used after this point */ |
| 2685 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 2685 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
| 2686 | old_tss_sel = 0xffff; | 2686 | old_tss_sel = 0xffff; |
| 2687 | 2687 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2086f2bfba33..2d03568e9498 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -70,7 +70,7 @@ struct kvm_pic { | |||
| 70 | struct kvm_io_device dev_slave; | 70 | struct kvm_io_device dev_slave; |
| 71 | struct kvm_io_device dev_eclr; | 71 | struct kvm_io_device dev_eclr; |
| 72 | void (*ack_notifier)(void *opaque, int irq); | 72 | void (*ack_notifier)(void *opaque, int irq); |
| 73 | unsigned long irq_states[16]; | 73 | unsigned long irq_states[PIC_NUM_PINS]; |
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ce878788a39f..fff7173f6a71 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -719,7 +719,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | |||
| 719 | { | 719 | { |
| 720 | unsigned char alignment = offset & 0xf; | 720 | unsigned char alignment = offset & 0xf; |
| 721 | u32 result; | 721 | u32 result; |
| 722 | /* this bitmask has a bit cleared for each reserver register */ | 722 | /* this bitmask has a bit cleared for each reserved register */ |
| 723 | static const u64 rmask = 0x43ff01ffffffe70cULL; | 723 | static const u64 rmask = 0x43ff01ffffffe70cULL; |
| 724 | 724 | ||
| 725 | if ((alignment + len) > 4) { | 725 | if ((alignment + len) > 4) { |
| @@ -792,7 +792,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
| 792 | atomic_set(&apic->lapic_timer.pending, 0); | 792 | atomic_set(&apic->lapic_timer.pending, 0); |
| 793 | 793 | ||
| 794 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { | 794 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { |
| 795 | /* lapic timer in oneshot or peroidic mode */ | 795 | /* lapic timer in oneshot or periodic mode */ |
| 796 | now = apic->lapic_timer.timer.base->get_time(); | 796 | now = apic->lapic_timer.timer.base->get_time(); |
| 797 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) | 797 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) |
| 798 | * APIC_BUS_CYCLE_NS * apic->divide_count; | 798 | * APIC_BUS_CYCLE_NS * apic->divide_count; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01ca00423938..241993443599 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep) | |||
| 556 | return 0; | 556 | return 0; |
| 557 | 557 | ||
| 558 | pfn = spte_to_pfn(old_spte); | 558 | pfn = spte_to_pfn(old_spte); |
| 559 | |||
| 560 | /* | ||
| 561 | * KVM does not hold the refcount of the page used by | ||
| 562 | * kvm mmu, before reclaiming the page, we should | ||
| 563 | * unmap it from mmu first. | ||
| 564 | */ | ||
| 565 | WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); | ||
| 566 | |||
| 559 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 567 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) |
| 560 | kvm_set_pfn_accessed(pfn); | 568 | kvm_set_pfn_accessed(pfn); |
| 561 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | 569 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) |
| @@ -960,13 +968,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
| 960 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, | 968 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, |
| 961 | struct kvm_memory_slot *slot) | 969 | struct kvm_memory_slot *slot) |
| 962 | { | 970 | { |
| 963 | struct kvm_lpage_info *linfo; | 971 | unsigned long idx; |
| 964 | 972 | ||
| 965 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 973 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
| 966 | return &slot->rmap[gfn - slot->base_gfn]; | 974 | return &slot->rmap[gfn - slot->base_gfn]; |
| 967 | 975 | ||
| 968 | linfo = lpage_info_slot(gfn, slot, level); | 976 | idx = gfn_to_index(gfn, slot->base_gfn, level); |
| 969 | return &linfo->rmap_pde; | 977 | return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx]; |
| 970 | } | 978 | } |
| 971 | 979 | ||
| 972 | /* | 980 | /* |
| @@ -1200,7 +1208,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 1200 | } | 1208 | } |
| 1201 | 1209 | ||
| 1202 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1210 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 1203 | unsigned long data) | 1211 | struct kvm_memory_slot *slot, unsigned long data) |
| 1204 | { | 1212 | { |
| 1205 | u64 *sptep; | 1213 | u64 *sptep; |
| 1206 | struct rmap_iterator iter; | 1214 | struct rmap_iterator iter; |
| @@ -1218,7 +1226,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 1218 | } | 1226 | } |
| 1219 | 1227 | ||
| 1220 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1228 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 1221 | unsigned long data) | 1229 | struct kvm_memory_slot *slot, unsigned long data) |
| 1222 | { | 1230 | { |
| 1223 | u64 *sptep; | 1231 | u64 *sptep; |
| 1224 | struct rmap_iterator iter; | 1232 | struct rmap_iterator iter; |
| @@ -1259,43 +1267,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 1259 | return 0; | 1267 | return 0; |
| 1260 | } | 1268 | } |
| 1261 | 1269 | ||
| 1262 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 1270 | static int kvm_handle_hva_range(struct kvm *kvm, |
| 1263 | unsigned long data, | 1271 | unsigned long start, |
| 1264 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1272 | unsigned long end, |
| 1265 | unsigned long data)) | 1273 | unsigned long data, |
| 1274 | int (*handler)(struct kvm *kvm, | ||
| 1275 | unsigned long *rmapp, | ||
| 1276 | struct kvm_memory_slot *slot, | ||
| 1277 | unsigned long data)) | ||
| 1266 | { | 1278 | { |
| 1267 | int j; | 1279 | int j; |
| 1268 | int ret; | 1280 | int ret = 0; |
| 1269 | int retval = 0; | ||
| 1270 | struct kvm_memslots *slots; | 1281 | struct kvm_memslots *slots; |
| 1271 | struct kvm_memory_slot *memslot; | 1282 | struct kvm_memory_slot *memslot; |
| 1272 | 1283 | ||
| 1273 | slots = kvm_memslots(kvm); | 1284 | slots = kvm_memslots(kvm); |
| 1274 | 1285 | ||
| 1275 | kvm_for_each_memslot(memslot, slots) { | 1286 | kvm_for_each_memslot(memslot, slots) { |
| 1276 | unsigned long start = memslot->userspace_addr; | 1287 | unsigned long hva_start, hva_end; |
| 1277 | unsigned long end; | 1288 | gfn_t gfn_start, gfn_end; |
| 1278 | 1289 | ||
| 1279 | end = start + (memslot->npages << PAGE_SHIFT); | 1290 | hva_start = max(start, memslot->userspace_addr); |
| 1280 | if (hva >= start && hva < end) { | 1291 | hva_end = min(end, memslot->userspace_addr + |
| 1281 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 1292 | (memslot->npages << PAGE_SHIFT)); |
| 1282 | gfn_t gfn = memslot->base_gfn + gfn_offset; | 1293 | if (hva_start >= hva_end) |
| 1294 | continue; | ||
| 1295 | /* | ||
| 1296 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
| 1297 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | ||
| 1298 | */ | ||
| 1299 | gfn_start = hva_to_gfn_memslot(hva_start, memslot); | ||
| 1300 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
| 1283 | 1301 | ||
| 1284 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 1302 | for (j = PT_PAGE_TABLE_LEVEL; |
| 1303 | j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { | ||
| 1304 | unsigned long idx, idx_end; | ||
| 1305 | unsigned long *rmapp; | ||
| 1285 | 1306 | ||
| 1286 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 1307 | /* |
| 1287 | struct kvm_lpage_info *linfo; | 1308 | * {idx(page_j) | page_j intersects with |
| 1309 | * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. | ||
| 1310 | */ | ||
| 1311 | idx = gfn_to_index(gfn_start, memslot->base_gfn, j); | ||
| 1312 | idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); | ||
| 1288 | 1313 | ||
| 1289 | linfo = lpage_info_slot(gfn, memslot, | 1314 | rmapp = __gfn_to_rmap(gfn_start, j, memslot); |
| 1290 | PT_DIRECTORY_LEVEL + j); | 1315 | |
| 1291 | ret |= handler(kvm, &linfo->rmap_pde, data); | 1316 | for (; idx <= idx_end; ++idx) |
| 1292 | } | 1317 | ret |= handler(kvm, rmapp++, memslot, data); |
| 1293 | trace_kvm_age_page(hva, memslot, ret); | ||
| 1294 | retval |= ret; | ||
| 1295 | } | 1318 | } |
| 1296 | } | 1319 | } |
| 1297 | 1320 | ||
| 1298 | return retval; | 1321 | return ret; |
| 1322 | } | ||
| 1323 | |||
| 1324 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
| 1325 | unsigned long data, | ||
| 1326 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
| 1327 | struct kvm_memory_slot *slot, | ||
| 1328 | unsigned long data)) | ||
| 1329 | { | ||
| 1330 | return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); | ||
| 1299 | } | 1331 | } |
| 1300 | 1332 | ||
| 1301 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 1333 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
| @@ -1303,13 +1335,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |||
| 1303 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); | 1335 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
| 1304 | } | 1336 | } |
| 1305 | 1337 | ||
| 1338 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
| 1339 | { | ||
| 1340 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); | ||
| 1341 | } | ||
| 1342 | |||
| 1306 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | 1343 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
| 1307 | { | 1344 | { |
| 1308 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); | 1345 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); |
| 1309 | } | 1346 | } |
| 1310 | 1347 | ||
| 1311 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1348 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 1312 | unsigned long data) | 1349 | struct kvm_memory_slot *slot, unsigned long data) |
| 1313 | { | 1350 | { |
| 1314 | u64 *sptep; | 1351 | u64 *sptep; |
| 1315 | struct rmap_iterator uninitialized_var(iter); | 1352 | struct rmap_iterator uninitialized_var(iter); |
| @@ -1323,8 +1360,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 1323 | * This has some overhead, but not as much as the cost of swapping | 1360 | * This has some overhead, but not as much as the cost of swapping |
| 1324 | * out actively used pages or breaking up actively used hugepages. | 1361 | * out actively used pages or breaking up actively used hugepages. |
| 1325 | */ | 1362 | */ |
| 1326 | if (!shadow_accessed_mask) | 1363 | if (!shadow_accessed_mask) { |
| 1327 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1364 | young = kvm_unmap_rmapp(kvm, rmapp, slot, data); |
| 1365 | goto out; | ||
| 1366 | } | ||
| 1328 | 1367 | ||
| 1329 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1368 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
| 1330 | sptep = rmap_get_next(&iter)) { | 1369 | sptep = rmap_get_next(&iter)) { |
| @@ -1336,12 +1375,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
| 1336 | (unsigned long *)sptep); | 1375 | (unsigned long *)sptep); |
| 1337 | } | 1376 | } |
| 1338 | } | 1377 | } |
| 1339 | 1378 | out: | |
| 1379 | /* @data has hva passed to kvm_age_hva(). */ | ||
| 1380 | trace_kvm_age_page(data, slot, young); | ||
| 1340 | return young; | 1381 | return young; |
| 1341 | } | 1382 | } |
| 1342 | 1383 | ||
| 1343 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1384 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
| 1344 | unsigned long data) | 1385 | struct kvm_memory_slot *slot, unsigned long data) |
| 1345 | { | 1386 | { |
| 1346 | u64 *sptep; | 1387 | u64 *sptep; |
| 1347 | struct rmap_iterator iter; | 1388 | struct rmap_iterator iter; |
| @@ -1379,13 +1420,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
| 1379 | 1420 | ||
| 1380 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 1421 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
| 1381 | 1422 | ||
| 1382 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 1423 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); |
| 1383 | kvm_flush_remote_tlbs(vcpu->kvm); | 1424 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 1384 | } | 1425 | } |
| 1385 | 1426 | ||
| 1386 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 1427 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
| 1387 | { | 1428 | { |
| 1388 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 1429 | return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); |
| 1389 | } | 1430 | } |
| 1390 | 1431 | ||
| 1391 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | 1432 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) |
| @@ -2472,14 +2513,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
| 2472 | unsigned long hva; | 2513 | unsigned long hva; |
| 2473 | 2514 | ||
| 2474 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); | 2515 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
| 2475 | if (!slot) { | 2516 | if (!slot) |
| 2476 | get_page(fault_page); | 2517 | return get_fault_pfn(); |
| 2477 | return page_to_pfn(fault_page); | ||
| 2478 | } | ||
| 2479 | 2518 | ||
| 2480 | hva = gfn_to_hva_memslot(slot, gfn); | 2519 | hva = gfn_to_hva_memslot(slot, gfn); |
| 2481 | 2520 | ||
| 2482 | return hva_to_pfn_atomic(vcpu->kvm, hva); | 2521 | return hva_to_pfn_atomic(hva); |
| 2483 | } | 2522 | } |
| 2484 | 2523 | ||
| 2485 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | 2524 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 9b7ec1150ab0..cfc258a6bf97 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Kernel-based Virtual Machine -- Performane Monitoring Unit support | 2 | * Kernel-based Virtual Machine -- Performance Monitoring Unit support |
| 3 | * | 3 | * |
| 4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | 4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. |
| 5 | * | 5 | * |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index baead950d6c8..687d0c30e559 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) | |||
| 2063 | if (svm->nested.intercept & 1ULL) { | 2063 | if (svm->nested.intercept & 1ULL) { |
| 2064 | /* | 2064 | /* |
| 2065 | * The #vmexit can't be emulated here directly because this | 2065 | * The #vmexit can't be emulated here directly because this |
| 2066 | * code path runs with irqs and preemtion disabled. A | 2066 | * code path runs with irqs and preemption disabled. A |
| 2067 | * #vmexit emulation might sleep. Only signal request for | 2067 | * #vmexit emulation might sleep. Only signal request for |
| 2068 | * the #vmexit here. | 2068 | * the #vmexit here. |
| 2069 | */ | 2069 | */ |
| @@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | |||
| 2409 | { | 2409 | { |
| 2410 | /* | 2410 | /* |
| 2411 | * This function merges the msr permission bitmaps of kvm and the | 2411 | * This function merges the msr permission bitmaps of kvm and the |
| 2412 | * nested vmcb. It is omptimized in that it only merges the parts where | 2412 | * nested vmcb. It is optimized in that it only merges the parts where |
| 2413 | * the kvm msr permission bitmap may contain zero bits | 2413 | * the kvm msr permission bitmap may contain zero bits |
| 2414 | */ | 2414 | */ |
| 2415 | int i; | 2415 | int i; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c39b60707e02..2300e5319ed9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
| 1343 | guest_efer = vmx->vcpu.arch.efer; | 1343 | guest_efer = vmx->vcpu.arch.efer; |
| 1344 | 1344 | ||
| 1345 | /* | 1345 | /* |
| 1346 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 1346 | * NX is emulated; LMA and LME handled by hardware; SCE meaningless |
| 1347 | * outside long mode | 1347 | * outside long mode |
| 1348 | */ | 1348 | */ |
| 1349 | ignore_bits = EFER_NX | EFER_SCE; | 1349 | ignore_bits = EFER_NX | EFER_SCE; |
| @@ -3261,7 +3261,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
| 3261 | * qemu binaries. | 3261 | * qemu binaries. |
| 3262 | * IA32 arch specifies that at the time of processor reset the | 3262 | * IA32 arch specifies that at the time of processor reset the |
| 3263 | * "Accessed" bit in the AR field of segment registers is 1. And qemu | 3263 | * "Accessed" bit in the AR field of segment registers is 1. And qemu |
| 3264 | * is setting it to 0 in the usedland code. This causes invalid guest | 3264 | * is setting it to 0 in the userland code. This causes invalid guest |
| 3265 | * state vmexit when "unrestricted guest" mode is turned on. | 3265 | * state vmexit when "unrestricted guest" mode is turned on. |
| 3266 | * Fix for this setup issue in cpu_reset is being pushed in the qemu | 3266 | * Fix for this setup issue in cpu_reset is being pushed in the qemu |
| 3267 | * tree. Newer qemu binaries with that qemu fix would not need this | 3267 | * tree. Newer qemu binaries with that qemu fix would not need this |
| @@ -4446,7 +4446,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
| 4446 | hypercall[2] = 0xc1; | 4446 | hypercall[2] = 0xc1; |
| 4447 | } | 4447 | } |
| 4448 | 4448 | ||
| 4449 | /* called to set cr0 as approriate for a mov-to-cr0 exit. */ | 4449 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
| 4450 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4450 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
| 4451 | { | 4451 | { |
| 4452 | if (to_vmx(vcpu)->nested.vmxon && | 4452 | if (to_vmx(vcpu)->nested.vmxon && |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59b59508ff07..3d9d08edbf29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -1093,7 +1093,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
| 1093 | * For each generation, we track the original measured | 1093 | * For each generation, we track the original measured |
| 1094 | * nanosecond time, offset, and write, so if TSCs are in | 1094 | * nanosecond time, offset, and write, so if TSCs are in |
| 1095 | * sync, we can match exact offset, and if not, we can match | 1095 | * sync, we can match exact offset, and if not, we can match |
| 1096 | * exact software computaion in compute_guest_tsc() | 1096 | * exact software computation in compute_guest_tsc() |
| 1097 | * | 1097 | * |
| 1098 | * These values are tracked in kvm->arch.cur_xxx variables. | 1098 | * These values are tracked in kvm->arch.cur_xxx variables. |
| 1099 | */ | 1099 | */ |
| @@ -1500,7 +1500,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
| 1500 | { | 1500 | { |
| 1501 | gpa_t gpa = data & ~0x3f; | 1501 | gpa_t gpa = data & ~0x3f; |
| 1502 | 1502 | ||
| 1503 | /* Bits 2:5 are resrved, Should be zero */ | 1503 | /* Bits 2:5 are reserved, Should be zero */ |
| 1504 | if (data & 0x3c) | 1504 | if (data & 0x3c) |
| 1505 | return 1; | 1505 | return 1; |
| 1506 | 1506 | ||
| @@ -1723,7 +1723,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 1723 | * Ignore all writes to this no longer documented MSR. | 1723 | * Ignore all writes to this no longer documented MSR. |
| 1724 | * Writes are only relevant for old K7 processors, | 1724 | * Writes are only relevant for old K7 processors, |
| 1725 | * all pre-dating SVM, but a recommended workaround from | 1725 | * all pre-dating SVM, but a recommended workaround from |
| 1726 | * AMD for these chips. It is possible to speicify the | 1726 | * AMD for these chips. It is possible to specify the |
| 1727 | * affected processor models on the command line, hence | 1727 | * affected processor models on the command line, hence |
| 1728 | * the need to ignore the workaround. | 1728 | * the need to ignore the workaround. |
| 1729 | */ | 1729 | */ |
| @@ -2632,7 +2632,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) | |||
| 2632 | if (!vcpu->arch.time_page) | 2632 | if (!vcpu->arch.time_page) |
| 2633 | return -EINVAL; | 2633 | return -EINVAL; |
| 2634 | src->flags |= PVCLOCK_GUEST_STOPPED; | 2634 | src->flags |= PVCLOCK_GUEST_STOPPED; |
| 2635 | mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); | ||
| 2636 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2635 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
| 2637 | return 0; | 2636 | return 0; |
| 2638 | } | 2637 | } |
| @@ -4492,7 +4491,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 4492 | 4491 | ||
| 4493 | /* | 4492 | /* |
| 4494 | * if emulation was due to access to shadowed page table | 4493 | * if emulation was due to access to shadowed page table |
| 4495 | * and it failed try to unshadow page and re-entetr the | 4494 | * and it failed try to unshadow page and re-enter the |
| 4496 | * guest to let CPU execute the instruction. | 4495 | * guest to let CPU execute the instruction. |
| 4497 | */ | 4496 | */ |
| 4498 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | 4497 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) |
| @@ -5588,7 +5587,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 5588 | /* | 5587 | /* |
| 5589 | * We are here if userspace calls get_regs() in the middle of | 5588 | * We are here if userspace calls get_regs() in the middle of |
| 5590 | * instruction emulation. Registers state needs to be copied | 5589 | * instruction emulation. Registers state needs to be copied |
| 5591 | * back from emulation context to vcpu. Usrapace shouldn't do | 5590 | * back from emulation context to vcpu. Userspace shouldn't do |
| 5592 | * that usually, but some bad designed PV devices (vmware | 5591 | * that usually, but some bad designed PV devices (vmware |
| 5593 | * backdoor interface) need this to work | 5592 | * backdoor interface) need this to work |
| 5594 | */ | 5593 | */ |
| @@ -6117,7 +6116,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
| 6117 | * as we reset last_host_tsc on all VCPUs to stop this from being | 6116 | * as we reset last_host_tsc on all VCPUs to stop this from being |
| 6118 | * called multiple times (one for each physical CPU bringup). | 6117 | * called multiple times (one for each physical CPU bringup). |
| 6119 | * | 6118 | * |
| 6120 | * Platforms with unnreliable TSCs don't have to deal with this, they | 6119 | * Platforms with unreliable TSCs don't have to deal with this, they |
| 6121 | * will be compensated by the logic in vcpu_load, which sets the TSC to | 6120 | * will be compensated by the logic in vcpu_load, which sets the TSC to |
| 6122 | * catchup mode. This will catchup all VCPUs to real time, but cannot | 6121 | * catchup mode. This will catchup all VCPUs to real time, but cannot |
| 6123 | * guarantee that they stay in perfect synchronization. | 6122 | * guarantee that they stay in perfect synchronization. |
| @@ -6314,6 +6313,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |||
| 6314 | int i; | 6313 | int i; |
| 6315 | 6314 | ||
| 6316 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6315 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
| 6316 | if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) { | ||
| 6317 | kvm_kvfree(free->arch.rmap_pde[i]); | ||
| 6318 | free->arch.rmap_pde[i] = NULL; | ||
| 6319 | } | ||
| 6317 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | 6320 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { |
| 6318 | kvm_kvfree(free->arch.lpage_info[i]); | 6321 | kvm_kvfree(free->arch.lpage_info[i]); |
| 6319 | free->arch.lpage_info[i] = NULL; | 6322 | free->arch.lpage_info[i] = NULL; |
| @@ -6333,6 +6336,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
| 6333 | lpages = gfn_to_index(slot->base_gfn + npages - 1, | 6336 | lpages = gfn_to_index(slot->base_gfn + npages - 1, |
| 6334 | slot->base_gfn, level) + 1; | 6337 | slot->base_gfn, level) + 1; |
| 6335 | 6338 | ||
| 6339 | slot->arch.rmap_pde[i] = | ||
| 6340 | kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i])); | ||
| 6341 | if (!slot->arch.rmap_pde[i]) | ||
| 6342 | goto out_free; | ||
| 6343 | |||
| 6336 | slot->arch.lpage_info[i] = | 6344 | slot->arch.lpage_info[i] = |
| 6337 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | 6345 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); |
| 6338 | if (!slot->arch.lpage_info[i]) | 6346 | if (!slot->arch.lpage_info[i]) |
| @@ -6361,7 +6369,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
| 6361 | 6369 | ||
| 6362 | out_free: | 6370 | out_free: |
| 6363 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6371 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
| 6372 | kvm_kvfree(slot->arch.rmap_pde[i]); | ||
| 6364 | kvm_kvfree(slot->arch.lpage_info[i]); | 6373 | kvm_kvfree(slot->arch.lpage_info[i]); |
| 6374 | slot->arch.rmap_pde[i] = NULL; | ||
| 6365 | slot->arch.lpage_info[i] = NULL; | 6375 | slot->arch.lpage_info[i] = NULL; |
| 6366 | } | 6376 | } |
| 6367 | return -ENOMEM; | 6377 | return -ENOMEM; |
| @@ -6381,7 +6391,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
| 6381 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | 6391 | map_flags = MAP_SHARED | MAP_ANONYMOUS; |
| 6382 | 6392 | ||
| 6383 | /*To keep backward compatibility with older userspace, | 6393 | /*To keep backward compatibility with older userspace, |
| 6384 | *x86 needs to hanlde !user_alloc case. | 6394 | *x86 needs to handle !user_alloc case. |
| 6385 | */ | 6395 | */ |
| 6386 | if (!user_alloc) { | 6396 | if (!user_alloc) { |
| 6387 | if (npages && !old.rmap) { | 6397 | if (npages && !old.rmap) { |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b70b48b01098..1993eb1cb2cd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -183,6 +183,18 @@ struct kvm_vcpu { | |||
| 183 | } async_pf; | 183 | } async_pf; |
| 184 | #endif | 184 | #endif |
| 185 | 185 | ||
| 186 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
| 187 | /* | ||
| 188 | * Cpu relax intercept or pause loop exit optimization | ||
| 189 | * in_spin_loop: set when a vcpu does a pause loop exit | ||
| 190 | * or cpu relax intercepted. | ||
| 191 | * dy_eligible: indicates whether vcpu is eligible for directed yield. | ||
| 192 | */ | ||
| 193 | struct { | ||
| 194 | bool in_spin_loop; | ||
| 195 | bool dy_eligible; | ||
| 196 | } spin_loop; | ||
| 197 | #endif | ||
| 186 | struct kvm_vcpu_arch arch; | 198 | struct kvm_vcpu_arch arch; |
| 187 | }; | 199 | }; |
| 188 | 200 | ||
| @@ -378,20 +390,11 @@ id_to_memslot(struct kvm_memslots *slots, int id) | |||
| 378 | return slot; | 390 | return slot; |
| 379 | } | 391 | } |
| 380 | 392 | ||
| 381 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | ||
| 382 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | ||
| 383 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | ||
| 384 | |||
| 385 | extern struct page *bad_page; | 393 | extern struct page *bad_page; |
| 386 | extern struct page *fault_page; | ||
| 387 | |||
| 388 | extern pfn_t bad_pfn; | ||
| 389 | extern pfn_t fault_pfn; | ||
| 390 | 394 | ||
| 391 | int is_error_page(struct page *page); | 395 | int is_error_page(struct page *page); |
| 392 | int is_error_pfn(pfn_t pfn); | 396 | int is_error_pfn(pfn_t pfn); |
| 393 | int is_hwpoison_pfn(pfn_t pfn); | 397 | int is_hwpoison_pfn(pfn_t pfn); |
| 394 | int is_fault_pfn(pfn_t pfn); | ||
| 395 | int is_noslot_pfn(pfn_t pfn); | 398 | int is_noslot_pfn(pfn_t pfn); |
| 396 | int is_invalid_pfn(pfn_t pfn); | 399 | int is_invalid_pfn(pfn_t pfn); |
| 397 | int kvm_is_error_hva(unsigned long addr); | 400 | int kvm_is_error_hva(unsigned long addr); |
| @@ -427,20 +430,20 @@ void kvm_release_page_dirty(struct page *page); | |||
| 427 | void kvm_set_page_dirty(struct page *page); | 430 | void kvm_set_page_dirty(struct page *page); |
| 428 | void kvm_set_page_accessed(struct page *page); | 431 | void kvm_set_page_accessed(struct page *page); |
| 429 | 432 | ||
| 430 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); | 433 | pfn_t hva_to_pfn_atomic(unsigned long addr); |
| 431 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); | 434 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); |
| 432 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, | 435 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, |
| 433 | bool write_fault, bool *writable); | 436 | bool write_fault, bool *writable); |
| 434 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | 437 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
| 435 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | 438 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, |
| 436 | bool *writable); | 439 | bool *writable); |
| 437 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 440 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); |
| 438 | struct kvm_memory_slot *slot, gfn_t gfn); | ||
| 439 | void kvm_release_pfn_dirty(pfn_t); | 441 | void kvm_release_pfn_dirty(pfn_t); |
| 440 | void kvm_release_pfn_clean(pfn_t pfn); | 442 | void kvm_release_pfn_clean(pfn_t pfn); |
| 441 | void kvm_set_pfn_dirty(pfn_t pfn); | 443 | void kvm_set_pfn_dirty(pfn_t pfn); |
| 442 | void kvm_set_pfn_accessed(pfn_t pfn); | 444 | void kvm_set_pfn_accessed(pfn_t pfn); |
| 443 | void kvm_get_pfn(pfn_t pfn); | 445 | void kvm_get_pfn(pfn_t pfn); |
| 446 | pfn_t get_fault_pfn(void); | ||
| 444 | 447 | ||
| 445 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 448 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, |
| 446 | int len); | 449 | int len); |
| @@ -740,6 +743,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
| 740 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 743 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
| 741 | } | 744 | } |
| 742 | 745 | ||
| 746 | static inline gfn_t | ||
| 747 | hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) | ||
| 748 | { | ||
| 749 | gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT; | ||
| 750 | |||
| 751 | return slot->base_gfn + gfn_offset; | ||
| 752 | } | ||
| 753 | |||
| 743 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, | 754 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, |
| 744 | gfn_t gfn) | 755 | gfn_t gfn) |
| 745 | { | 756 | { |
| @@ -899,5 +910,32 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) | |||
| 899 | } | 910 | } |
| 900 | } | 911 | } |
| 901 | 912 | ||
| 913 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
| 914 | |||
| 915 | static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) | ||
| 916 | { | ||
| 917 | vcpu->spin_loop.in_spin_loop = val; | ||
| 918 | } | ||
| 919 | static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) | ||
| 920 | { | ||
| 921 | vcpu->spin_loop.dy_eligible = val; | ||
| 922 | } | ||
| 923 | |||
| 924 | #else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ | ||
| 925 | |||
| 926 | static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) | ||
| 927 | { | ||
| 928 | } | ||
| 929 | |||
| 930 | static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) | ||
| 931 | { | ||
| 932 | } | ||
| 933 | |||
| 934 | static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | ||
| 935 | { | ||
| 936 | return true; | ||
| 937 | } | ||
| 938 | |||
| 939 | #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ | ||
| 902 | #endif | 940 | #endif |
| 903 | 941 | ||
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 28694f4a9139..d01b24b72c61 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
| @@ -21,3 +21,6 @@ config KVM_ASYNC_PF | |||
| 21 | 21 | ||
| 22 | config HAVE_KVM_MSI | 22 | config HAVE_KVM_MSI |
| 23 | bool | 23 | bool |
| 24 | |||
| 25 | config HAVE_KVM_CPU_RELAX_INTERCEPT | ||
| 26 | bool | ||
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9fff9830bf0..c03f1fb26701 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
| @@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
| 42 | static void kvm_iommu_put_pages(struct kvm *kvm, | 42 | static void kvm_iommu_put_pages(struct kvm *kvm, |
| 43 | gfn_t base_gfn, unsigned long npages); | 43 | gfn_t base_gfn, unsigned long npages); |
| 44 | 44 | ||
| 45 | static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | 45 | static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, |
| 46 | gfn_t gfn, unsigned long size) | 46 | unsigned long size) |
| 47 | { | 47 | { |
| 48 | gfn_t end_gfn; | 48 | gfn_t end_gfn; |
| 49 | pfn_t pfn; | 49 | pfn_t pfn; |
| 50 | 50 | ||
| 51 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); | 51 | pfn = gfn_to_pfn_memslot(slot, gfn); |
| 52 | end_gfn = gfn + (size >> PAGE_SHIFT); | 52 | end_gfn = gfn + (size >> PAGE_SHIFT); |
| 53 | gfn += 1; | 53 | gfn += 1; |
| 54 | 54 | ||
| @@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
| 56 | return pfn; | 56 | return pfn; |
| 57 | 57 | ||
| 58 | while (gfn < end_gfn) | 58 | while (gfn < end_gfn) |
| 59 | gfn_to_pfn_memslot(kvm, slot, gfn++); | 59 | gfn_to_pfn_memslot(slot, gfn++); |
| 60 | 60 | ||
| 61 | return pfn; | 61 | return pfn; |
| 62 | } | 62 | } |
| @@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
| 105 | * Pin all pages we are about to map in memory. This is | 105 | * Pin all pages we are about to map in memory. This is |
| 106 | * important because we unmap and unpin in 4kb steps later. | 106 | * important because we unmap and unpin in 4kb steps later. |
| 107 | */ | 107 | */ |
| 108 | pfn = kvm_pin_pages(kvm, slot, gfn, page_size); | 108 | pfn = kvm_pin_pages(slot, gfn, page_size); |
| 109 | if (is_error_pfn(pfn)) { | 109 | if (is_error_pfn(pfn)) { |
| 110 | gfn += 1; | 110 | gfn += 1; |
| 111 | continue; | 111 | continue; |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 83402d74a767..7118be0f2f2c 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
| @@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
| 321 | switch (ue->u.irqchip.irqchip) { | 321 | switch (ue->u.irqchip.irqchip) { |
| 322 | case KVM_IRQCHIP_PIC_MASTER: | 322 | case KVM_IRQCHIP_PIC_MASTER: |
| 323 | e->set = kvm_set_pic_irq; | 323 | e->set = kvm_set_pic_irq; |
| 324 | max_pin = 16; | 324 | max_pin = PIC_NUM_PINS; |
| 325 | break; | 325 | break; |
| 326 | case KVM_IRQCHIP_PIC_SLAVE: | 326 | case KVM_IRQCHIP_PIC_SLAVE: |
| 327 | e->set = kvm_set_pic_irq; | 327 | e->set = kvm_set_pic_irq; |
| 328 | max_pin = 16; | 328 | max_pin = PIC_NUM_PINS; |
| 329 | delta = 8; | 329 | delta = 8; |
| 330 | break; | 330 | break; |
| 331 | case KVM_IRQCHIP_IOAPIC: | 331 | case KVM_IRQCHIP_IOAPIC: |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 246852397e30..0014ee99dc7f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); | |||
| 100 | 100 | ||
| 101 | static bool largepages_enabled = true; | 101 | static bool largepages_enabled = true; |
| 102 | 102 | ||
| 103 | struct page *bad_page; | ||
| 104 | static pfn_t bad_pfn; | ||
| 105 | |||
| 103 | static struct page *hwpoison_page; | 106 | static struct page *hwpoison_page; |
| 104 | static pfn_t hwpoison_pfn; | 107 | static pfn_t hwpoison_pfn; |
| 105 | 108 | ||
| 106 | struct page *fault_page; | 109 | static struct page *fault_page; |
| 107 | pfn_t fault_pfn; | 110 | static pfn_t fault_pfn; |
| 108 | 111 | ||
| 109 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 112 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
| 110 | { | 113 | { |
| @@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
| 236 | } | 239 | } |
| 237 | vcpu->run = page_address(page); | 240 | vcpu->run = page_address(page); |
| 238 | 241 | ||
| 242 | kvm_vcpu_set_in_spin_loop(vcpu, false); | ||
| 243 | kvm_vcpu_set_dy_eligible(vcpu, false); | ||
| 244 | |||
| 239 | r = kvm_arch_vcpu_init(vcpu); | 245 | r = kvm_arch_vcpu_init(vcpu); |
| 240 | if (r < 0) | 246 | if (r < 0) |
| 241 | goto fail_free_run; | 247 | goto fail_free_run; |
| @@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
| 332 | * count is also read inside the mmu_lock critical section. | 338 | * count is also read inside the mmu_lock critical section. |
| 333 | */ | 339 | */ |
| 334 | kvm->mmu_notifier_count++; | 340 | kvm->mmu_notifier_count++; |
| 335 | for (; start < end; start += PAGE_SIZE) | 341 | need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); |
| 336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
| 337 | need_tlb_flush |= kvm->tlbs_dirty; | 342 | need_tlb_flush |= kvm->tlbs_dirty; |
| 338 | /* we've to flush the tlb before the pages can be freed */ | 343 | /* we've to flush the tlb before the pages can be freed */ |
| 339 | if (need_tlb_flush) | 344 | if (need_tlb_flush) |
| @@ -950,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn) | |||
| 950 | } | 955 | } |
| 951 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | 956 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); |
| 952 | 957 | ||
| 953 | int is_fault_pfn(pfn_t pfn) | ||
| 954 | { | ||
| 955 | return pfn == fault_pfn; | ||
| 956 | } | ||
| 957 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
| 958 | |||
| 959 | int is_noslot_pfn(pfn_t pfn) | 958 | int is_noslot_pfn(pfn_t pfn) |
| 960 | { | 959 | { |
| 961 | return pfn == bad_pfn; | 960 | return pfn == bad_pfn; |
| @@ -1039,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
| 1039 | } | 1038 | } |
| 1040 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 1039 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
| 1041 | 1040 | ||
| 1042 | static pfn_t get_fault_pfn(void) | 1041 | pfn_t get_fault_pfn(void) |
| 1043 | { | 1042 | { |
| 1044 | get_page(fault_page); | 1043 | get_page(fault_page); |
| 1045 | return fault_pfn; | 1044 | return fault_pfn; |
| 1046 | } | 1045 | } |
| 1046 | EXPORT_SYMBOL_GPL(get_fault_pfn); | ||
| 1047 | 1047 | ||
| 1048 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | 1048 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
| 1049 | unsigned long start, int write, struct page **page) | 1049 | unsigned long start, int write, struct page **page) |
| @@ -1065,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr) | |||
| 1065 | return rc == -EHWPOISON; | 1065 | return rc == -EHWPOISON; |
| 1066 | } | 1066 | } |
| 1067 | 1067 | ||
| 1068 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, | 1068 | static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, |
| 1069 | bool *async, bool write_fault, bool *writable) | 1069 | bool write_fault, bool *writable) |
| 1070 | { | 1070 | { |
| 1071 | struct page *page[1]; | 1071 | struct page *page[1]; |
| 1072 | int npages = 0; | 1072 | int npages = 0; |
| @@ -1146,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, | |||
| 1146 | return pfn; | 1146 | return pfn; |
| 1147 | } | 1147 | } |
| 1148 | 1148 | ||
| 1149 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) | 1149 | pfn_t hva_to_pfn_atomic(unsigned long addr) |
| 1150 | { | 1150 | { |
| 1151 | return hva_to_pfn(kvm, addr, true, NULL, true, NULL); | 1151 | return hva_to_pfn(addr, true, NULL, true, NULL); |
| 1152 | } | 1152 | } |
| 1153 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); | 1153 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); |
| 1154 | 1154 | ||
| @@ -1166,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, | |||
| 1166 | return page_to_pfn(bad_page); | 1166 | return page_to_pfn(bad_page); |
| 1167 | } | 1167 | } |
| 1168 | 1168 | ||
| 1169 | return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); | 1169 | return hva_to_pfn(addr, atomic, async, write_fault, writable); |
| 1170 | } | 1170 | } |
| 1171 | 1171 | ||
| 1172 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) | 1172 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) |
| @@ -1195,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | |||
| 1195 | } | 1195 | } |
| 1196 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | 1196 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); |
| 1197 | 1197 | ||
| 1198 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 1198 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) |
| 1199 | struct kvm_memory_slot *slot, gfn_t gfn) | ||
| 1200 | { | 1199 | { |
| 1201 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | 1200 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); |
| 1202 | return hva_to_pfn(kvm, addr, false, NULL, true, NULL); | 1201 | return hva_to_pfn(addr, false, NULL, true, NULL); |
| 1203 | } | 1202 | } |
| 1204 | 1203 | ||
| 1205 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | 1204 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, |
| @@ -1580,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) | |||
| 1580 | } | 1579 | } |
| 1581 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); | 1580 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); |
| 1582 | 1581 | ||
| 1582 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
| 1583 | /* | ||
| 1584 | * Helper that checks whether a VCPU is eligible for directed yield. | ||
| 1585 | * Most eligible candidate to yield is decided by following heuristics: | ||
| 1586 | * | ||
| 1587 | * (a) VCPU which has not done pl-exit or cpu relax intercepted recently | ||
| 1588 | * (preempted lock holder), indicated by @in_spin_loop. | ||
| 1589 | * Set at the beiginning and cleared at the end of interception/PLE handler. | ||
| 1590 | * | ||
| 1591 | * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get | ||
| 1592 | * chance last time (mostly it has become eligible now since we have probably | ||
| 1593 | * yielded to lockholder in last iteration. This is done by toggling | ||
| 1594 | * @dy_eligible each time a VCPU checked for eligibility.) | ||
| 1595 | * | ||
| 1596 | * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding | ||
| 1597 | * to preempted lock-holder could result in wrong VCPU selection and CPU | ||
| 1598 | * burning. Giving priority for a potential lock-holder increases lock | ||
| 1599 | * progress. | ||
| 1600 | * | ||
| 1601 | * Since algorithm is based on heuristics, accessing another VCPU data without | ||
| 1602 | * locking does not harm. It may result in trying to yield to same VCPU, fail | ||
| 1603 | * and continue with next VCPU and so on. | ||
| 1604 | */ | ||
| 1605 | bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | ||
| 1606 | { | ||
| 1607 | bool eligible; | ||
| 1608 | |||
| 1609 | eligible = !vcpu->spin_loop.in_spin_loop || | ||
| 1610 | (vcpu->spin_loop.in_spin_loop && | ||
| 1611 | vcpu->spin_loop.dy_eligible); | ||
| 1612 | |||
| 1613 | if (vcpu->spin_loop.in_spin_loop) | ||
| 1614 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); | ||
| 1615 | |||
| 1616 | return eligible; | ||
| 1617 | } | ||
| 1618 | #endif | ||
| 1583 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 1619 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) |
| 1584 | { | 1620 | { |
| 1585 | struct kvm *kvm = me->kvm; | 1621 | struct kvm *kvm = me->kvm; |
| @@ -1589,6 +1625,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
| 1589 | int pass; | 1625 | int pass; |
| 1590 | int i; | 1626 | int i; |
| 1591 | 1627 | ||
| 1628 | kvm_vcpu_set_in_spin_loop(me, true); | ||
| 1592 | /* | 1629 | /* |
| 1593 | * We boost the priority of a VCPU that is runnable but not | 1630 | * We boost the priority of a VCPU that is runnable but not |
| 1594 | * currently running, because it got preempted by something | 1631 | * currently running, because it got preempted by something |
| @@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
| 1607 | continue; | 1644 | continue; |
| 1608 | if (waitqueue_active(&vcpu->wq)) | 1645 | if (waitqueue_active(&vcpu->wq)) |
| 1609 | continue; | 1646 | continue; |
| 1647 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | ||
| 1648 | continue; | ||
| 1610 | if (kvm_vcpu_yield_to(vcpu)) { | 1649 | if (kvm_vcpu_yield_to(vcpu)) { |
| 1611 | kvm->last_boosted_vcpu = i; | 1650 | kvm->last_boosted_vcpu = i; |
| 1612 | yielded = 1; | 1651 | yielded = 1; |
| @@ -1614,6 +1653,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
| 1614 | } | 1653 | } |
| 1615 | } | 1654 | } |
| 1616 | } | 1655 | } |
| 1656 | kvm_vcpu_set_in_spin_loop(me, false); | ||
| 1657 | |||
| 1658 | /* Ensure vcpu is not eligible during next spinloop */ | ||
| 1659 | kvm_vcpu_set_dy_eligible(me, false); | ||
| 1617 | } | 1660 | } |
| 1618 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | 1661 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); |
| 1619 | 1662 | ||
| @@ -2697,9 +2740,6 @@ static struct syscore_ops kvm_syscore_ops = { | |||
| 2697 | .resume = kvm_resume, | 2740 | .resume = kvm_resume, |
| 2698 | }; | 2741 | }; |
| 2699 | 2742 | ||
| 2700 | struct page *bad_page; | ||
| 2701 | pfn_t bad_pfn; | ||
| 2702 | |||
| 2703 | static inline | 2743 | static inline |
| 2704 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 2744 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
| 2705 | { | 2745 | { |
