diff options
author | Avi Kivity <avi@redhat.com> | 2012-07-26 04:54:21 -0400 |
---|---|---|
committer | Avi Kivity <avi@redhat.com> | 2012-07-26 04:54:21 -0400 |
commit | e9bda6f6f902e6b55d9baceb5523468a048cbe56 (patch) | |
tree | bf09cc165da1197cd34967da0593d08b9a37c0f3 | |
parent | bdc0077af574800d24318b6945cf2344e8dbb050 (diff) | |
parent | 06e48c510aa37f6e791602e6420422ea7071fe94 (diff) |
Merge branch 'queue' into next
Merge patches queued during the run-up to the merge window.
* queue: (25 commits)
KVM: Choose better candidate for directed yield
KVM: Note down when cpu relax intercepted or pause loop exited
KVM: Add config to support ple or cpu relax optimzation
KVM: switch to symbolic name for irq_states size
KVM: x86: Fix typos in pmu.c
KVM: x86: Fix typos in lapic.c
KVM: x86: Fix typos in cpuid.c
KVM: x86: Fix typos in emulate.c
KVM: x86: Fix typos in x86.c
KVM: SVM: Fix typos
KVM: VMX: Fix typos
KVM: remove the unused parameter of gfn_to_pfn_memslot
KVM: remove is_error_hpa
KVM: make bad_pfn static to kvm_main.c
KVM: using get_fault_pfn to get the fault pfn
KVM: MMU: track the refcount when unmap the page
KVM: x86: remove unnecessary mark_page_dirty
KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range()
KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp()
KVM: MMU: Add memslot parameter to hva handlers
...
Signed-off-by: Avi Kivity <avi@redhat.com>
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 47 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_tlb.c | 2 | ||||
-rw-r--r-- | arch/s390/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 117 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 26 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 62 | ||||
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 10 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 4 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 84 |
20 files changed, 277 insertions, 115 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 50ea12fd7bf5..572ad0141268 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -52,6 +52,8 @@ | |||
52 | 52 | ||
53 | struct kvm; | 53 | struct kvm; |
54 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 54 | extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
55 | extern int kvm_unmap_hva_range(struct kvm *kvm, | ||
56 | unsigned long start, unsigned long end); | ||
55 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 57 | extern int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
56 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 58 | extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
57 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 59 | extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d03eb6f7b058..3c635c0616b0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c | |||
@@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
756 | goto out_put; | 756 | goto out_put; |
757 | } | 757 | } |
758 | 758 | ||
759 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 759 | static int kvm_handle_hva_range(struct kvm *kvm, |
760 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 760 | unsigned long start, |
761 | unsigned long gfn)) | 761 | unsigned long end, |
762 | int (*handler)(struct kvm *kvm, | ||
763 | unsigned long *rmapp, | ||
764 | unsigned long gfn)) | ||
762 | { | 765 | { |
763 | int ret; | 766 | int ret; |
764 | int retval = 0; | 767 | int retval = 0; |
@@ -767,15 +770,25 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
767 | 770 | ||
768 | slots = kvm_memslots(kvm); | 771 | slots = kvm_memslots(kvm); |
769 | kvm_for_each_memslot(memslot, slots) { | 772 | kvm_for_each_memslot(memslot, slots) { |
770 | unsigned long start = memslot->userspace_addr; | 773 | unsigned long hva_start, hva_end; |
771 | unsigned long end; | 774 | gfn_t gfn, gfn_end; |
772 | 775 | ||
773 | end = start + (memslot->npages << PAGE_SHIFT); | 776 | hva_start = max(start, memslot->userspace_addr); |
774 | if (hva >= start && hva < end) { | 777 | hva_end = min(end, memslot->userspace_addr + |
775 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 778 | (memslot->npages << PAGE_SHIFT)); |
779 | if (hva_start >= hva_end) | ||
780 | continue; | ||
781 | /* | ||
782 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
783 | * {gfn, gfn+1, ..., gfn_end-1}. | ||
784 | */ | ||
785 | gfn = hva_to_gfn_memslot(hva_start, memslot); | ||
786 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
787 | |||
788 | for (; gfn < gfn_end; ++gfn) { | ||
789 | gfn_t gfn_offset = gfn - memslot->base_gfn; | ||
776 | 790 | ||
777 | ret = handler(kvm, &memslot->rmap[gfn_offset], | 791 | ret = handler(kvm, &memslot->rmap[gfn_offset], gfn); |
778 | memslot->base_gfn + gfn_offset); | ||
779 | retval |= ret; | 792 | retval |= ret; |
780 | } | 793 | } |
781 | } | 794 | } |
@@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
783 | return retval; | 796 | return retval; |
784 | } | 797 | } |
785 | 798 | ||
799 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
800 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
801 | unsigned long gfn)) | ||
802 | { | ||
803 | return kvm_handle_hva_range(kvm, hva, hva + 1, handler); | ||
804 | } | ||
805 | |||
786 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 806 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
787 | unsigned long gfn) | 807 | unsigned long gfn) |
788 | { | 808 | { |
@@ -850,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |||
850 | return 0; | 870 | return 0; |
851 | } | 871 | } |
852 | 872 | ||
873 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
874 | { | ||
875 | if (kvm->arch.using_mmu_notifiers) | ||
876 | kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp); | ||
877 | return 0; | ||
878 | } | ||
879 | |||
853 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 880 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
854 | unsigned long gfn) | 881 | unsigned long gfn) |
855 | { | 882 | { |
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c510fc961302..c8f6c5826742 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c | |||
@@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, | |||
520 | 520 | ||
521 | if (likely(!pfnmap)) { | 521 | if (likely(!pfnmap)) { |
522 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); | 522 | unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); |
523 | pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn); | 523 | pfn = gfn_to_pfn_memslot(slot, gfn); |
524 | if (is_error_pfn(pfn)) { | 524 | if (is_error_pfn(pfn)) { |
525 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", | 525 | printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", |
526 | (long)gfn); | 526 | (long)gfn); |
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 78eb9847008f..a6e2677724e1 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig | |||
@@ -21,6 +21,7 @@ config KVM | |||
21 | depends on HAVE_KVM && EXPERIMENTAL | 21 | depends on HAVE_KVM && EXPERIMENTAL |
22 | select PREEMPT_NOTIFIERS | 22 | select PREEMPT_NOTIFIERS |
23 | select ANON_INODES | 23 | select ANON_INODES |
24 | select HAVE_KVM_CPU_RELAX_INTERCEPT | ||
24 | ---help--- | 25 | ---help--- |
25 | Support hosting paravirtualized guest machines using the SIE | 26 | Support hosting paravirtualized guest machines using the SIE |
26 | virtualization capability on the mainframe. This should work | 27 | virtualization capability on the mainframe. This should work |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d64cf7e..48e713188469 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -500,11 +500,11 @@ struct kvm_vcpu_arch { | |||
500 | }; | 500 | }; |
501 | 501 | ||
502 | struct kvm_lpage_info { | 502 | struct kvm_lpage_info { |
503 | unsigned long rmap_pde; | ||
504 | int write_count; | 503 | int write_count; |
505 | }; | 504 | }; |
506 | 505 | ||
507 | struct kvm_arch_memory_slot { | 506 | struct kvm_arch_memory_slot { |
507 | unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1]; | ||
508 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; | 508 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
509 | }; | 509 | }; |
510 | 510 | ||
@@ -957,6 +957,7 @@ extern bool kvm_rebooting; | |||
957 | 957 | ||
958 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 958 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
959 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 959 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
960 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); | ||
960 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 961 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
961 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); | 962 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); |
962 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); | 963 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a28f338843ea..45c044f0fff7 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -37,6 +37,7 @@ config KVM | |||
37 | select TASK_DELAY_ACCT | 37 | select TASK_DELAY_ACCT |
38 | select PERF_EVENTS | 38 | select PERF_EVENTS |
39 | select HAVE_KVM_MSI | 39 | select HAVE_KVM_MSI |
40 | select HAVE_KVM_CPU_RELAX_INTERCEPT | ||
40 | ---help--- | 41 | ---help--- |
41 | Support hosting fully virtualized guest machines using hardware | 42 | Support hosting fully virtualized guest machines using hardware |
42 | virtualization extensions. You will need a fairly recent | 43 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0595f1397b7c..b496da684bd6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
316 | } | 316 | } |
317 | case 7: { | 317 | case 7: { |
318 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 318 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
319 | /* Mask ebx against host capbability word 9 */ | 319 | /* Mask ebx against host capability word 9 */ |
320 | if (index == 0) { | 320 | if (index == 0) { |
321 | entry->ebx &= kvm_supported_word9_x86_features; | 321 | entry->ebx &= kvm_supported_word9_x86_features; |
322 | cpuid_mask(&entry->ebx, 9); | 322 | cpuid_mask(&entry->ebx, 9); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 97d9a9914ba8..85b611e13e84 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
642 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | 642 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) |
643 | goto bad; | 643 | goto bad; |
644 | } else { | 644 | } else { |
645 | /* exapand-down segment */ | 645 | /* expand-down segment */ |
646 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) | 646 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) |
647 | goto bad; | 647 | goto bad; |
648 | lim = desc.d ? 0xffffffff : 0xffff; | 648 | lim = desc.d ? 0xffffffff : 0xffff; |
@@ -1383,7 +1383,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1383 | err_code = selector & 0xfffc; | 1383 | err_code = selector & 0xfffc; |
1384 | err_vec = GP_VECTOR; | 1384 | err_vec = GP_VECTOR; |
1385 | 1385 | ||
1386 | /* can't load system descriptor into segment selecor */ | 1386 | /* can't load system descriptor into segment selector */ |
1387 | if (seg <= VCPU_SREG_GS && !seg_desc.s) | 1387 | if (seg <= VCPU_SREG_GS && !seg_desc.s) |
1388 | goto exception; | 1388 | goto exception; |
1389 | 1389 | ||
@@ -2398,7 +2398,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2398 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); | 2398 | set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); |
2399 | 2399 | ||
2400 | /* | 2400 | /* |
2401 | * Now load segment descriptors. If fault happenes at this stage | 2401 | * Now load segment descriptors. If fault happens at this stage |
2402 | * it is handled in a context of new task | 2402 | * it is handled in a context of new task |
2403 | */ | 2403 | */ |
2404 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); | 2404 | ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); |
@@ -2640,7 +2640,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2640 | * | 2640 | * |
2641 | * 1. jmp/call/int to task gate: Check against DPL of the task gate | 2641 | * 1. jmp/call/int to task gate: Check against DPL of the task gate |
2642 | * 2. Exception/IRQ/iret: No check is performed | 2642 | * 2. Exception/IRQ/iret: No check is performed |
2643 | * 3. jmp/call to TSS: Check agains DPL of the TSS | 2643 | * 3. jmp/call to TSS: Check against DPL of the TSS |
2644 | */ | 2644 | */ |
2645 | if (reason == TASK_SWITCH_GATE) { | 2645 | if (reason == TASK_SWITCH_GATE) { |
2646 | if (idt_index != -1) { | 2646 | if (idt_index != -1) { |
@@ -2681,7 +2681,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2681 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; | 2681 | ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; |
2682 | 2682 | ||
2683 | /* set back link to prev task only if NT bit is set in eflags | 2683 | /* set back link to prev task only if NT bit is set in eflags |
2684 | note that old_tss_sel is not used afetr this point */ | 2684 | note that old_tss_sel is not used after this point */ |
2685 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 2685 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
2686 | old_tss_sel = 0xffff; | 2686 | old_tss_sel = 0xffff; |
2687 | 2687 | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2086f2bfba33..2d03568e9498 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -70,7 +70,7 @@ struct kvm_pic { | |||
70 | struct kvm_io_device dev_slave; | 70 | struct kvm_io_device dev_slave; |
71 | struct kvm_io_device dev_eclr; | 71 | struct kvm_io_device dev_eclr; |
72 | void (*ack_notifier)(void *opaque, int irq); | 72 | void (*ack_notifier)(void *opaque, int irq); |
73 | unsigned long irq_states[16]; | 73 | unsigned long irq_states[PIC_NUM_PINS]; |
74 | }; | 74 | }; |
75 | 75 | ||
76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ce878788a39f..fff7173f6a71 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -719,7 +719,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, | |||
719 | { | 719 | { |
720 | unsigned char alignment = offset & 0xf; | 720 | unsigned char alignment = offset & 0xf; |
721 | u32 result; | 721 | u32 result; |
722 | /* this bitmask has a bit cleared for each reserver register */ | 722 | /* this bitmask has a bit cleared for each reserved register */ |
723 | static const u64 rmask = 0x43ff01ffffffe70cULL; | 723 | static const u64 rmask = 0x43ff01ffffffe70cULL; |
724 | 724 | ||
725 | if ((alignment + len) > 4) { | 725 | if ((alignment + len) > 4) { |
@@ -792,7 +792,7 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
792 | atomic_set(&apic->lapic_timer.pending, 0); | 792 | atomic_set(&apic->lapic_timer.pending, 0); |
793 | 793 | ||
794 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { | 794 | if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { |
795 | /* lapic timer in oneshot or peroidic mode */ | 795 | /* lapic timer in oneshot or periodic mode */ |
796 | now = apic->lapic_timer.timer.base->get_time(); | 796 | now = apic->lapic_timer.timer.base->get_time(); |
797 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) | 797 | apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) |
798 | * APIC_BUS_CYCLE_NS * apic->divide_count; | 798 | * APIC_BUS_CYCLE_NS * apic->divide_count; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01ca00423938..241993443599 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep) | |||
556 | return 0; | 556 | return 0; |
557 | 557 | ||
558 | pfn = spte_to_pfn(old_spte); | 558 | pfn = spte_to_pfn(old_spte); |
559 | |||
560 | /* | ||
561 | * KVM does not hold the refcount of the page used by | ||
562 | * kvm mmu, before reclaiming the page, we should | ||
563 | * unmap it from mmu first. | ||
564 | */ | ||
565 | WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); | ||
566 | |||
559 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 567 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) |
560 | kvm_set_pfn_accessed(pfn); | 568 | kvm_set_pfn_accessed(pfn); |
561 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) | 569 | if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) |
@@ -960,13 +968,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) | |||
960 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, | 968 | static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, |
961 | struct kvm_memory_slot *slot) | 969 | struct kvm_memory_slot *slot) |
962 | { | 970 | { |
963 | struct kvm_lpage_info *linfo; | 971 | unsigned long idx; |
964 | 972 | ||
965 | if (likely(level == PT_PAGE_TABLE_LEVEL)) | 973 | if (likely(level == PT_PAGE_TABLE_LEVEL)) |
966 | return &slot->rmap[gfn - slot->base_gfn]; | 974 | return &slot->rmap[gfn - slot->base_gfn]; |
967 | 975 | ||
968 | linfo = lpage_info_slot(gfn, slot, level); | 976 | idx = gfn_to_index(gfn, slot->base_gfn, level); |
969 | return &linfo->rmap_pde; | 977 | return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx]; |
970 | } | 978 | } |
971 | 979 | ||
972 | /* | 980 | /* |
@@ -1200,7 +1208,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
1200 | } | 1208 | } |
1201 | 1209 | ||
1202 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1210 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1203 | unsigned long data) | 1211 | struct kvm_memory_slot *slot, unsigned long data) |
1204 | { | 1212 | { |
1205 | u64 *sptep; | 1213 | u64 *sptep; |
1206 | struct rmap_iterator iter; | 1214 | struct rmap_iterator iter; |
@@ -1218,7 +1226,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1218 | } | 1226 | } |
1219 | 1227 | ||
1220 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1228 | static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1221 | unsigned long data) | 1229 | struct kvm_memory_slot *slot, unsigned long data) |
1222 | { | 1230 | { |
1223 | u64 *sptep; | 1231 | u64 *sptep; |
1224 | struct rmap_iterator iter; | 1232 | struct rmap_iterator iter; |
@@ -1259,43 +1267,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1259 | return 0; | 1267 | return 0; |
1260 | } | 1268 | } |
1261 | 1269 | ||
1262 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | 1270 | static int kvm_handle_hva_range(struct kvm *kvm, |
1263 | unsigned long data, | 1271 | unsigned long start, |
1264 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | 1272 | unsigned long end, |
1265 | unsigned long data)) | 1273 | unsigned long data, |
1274 | int (*handler)(struct kvm *kvm, | ||
1275 | unsigned long *rmapp, | ||
1276 | struct kvm_memory_slot *slot, | ||
1277 | unsigned long data)) | ||
1266 | { | 1278 | { |
1267 | int j; | 1279 | int j; |
1268 | int ret; | 1280 | int ret = 0; |
1269 | int retval = 0; | ||
1270 | struct kvm_memslots *slots; | 1281 | struct kvm_memslots *slots; |
1271 | struct kvm_memory_slot *memslot; | 1282 | struct kvm_memory_slot *memslot; |
1272 | 1283 | ||
1273 | slots = kvm_memslots(kvm); | 1284 | slots = kvm_memslots(kvm); |
1274 | 1285 | ||
1275 | kvm_for_each_memslot(memslot, slots) { | 1286 | kvm_for_each_memslot(memslot, slots) { |
1276 | unsigned long start = memslot->userspace_addr; | 1287 | unsigned long hva_start, hva_end; |
1277 | unsigned long end; | 1288 | gfn_t gfn_start, gfn_end; |
1278 | 1289 | ||
1279 | end = start + (memslot->npages << PAGE_SHIFT); | 1290 | hva_start = max(start, memslot->userspace_addr); |
1280 | if (hva >= start && hva < end) { | 1291 | hva_end = min(end, memslot->userspace_addr + |
1281 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 1292 | (memslot->npages << PAGE_SHIFT)); |
1282 | gfn_t gfn = memslot->base_gfn + gfn_offset; | 1293 | if (hva_start >= hva_end) |
1294 | continue; | ||
1295 | /* | ||
1296 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | ||
1297 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | ||
1298 | */ | ||
1299 | gfn_start = hva_to_gfn_memslot(hva_start, memslot); | ||
1300 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | ||
1283 | 1301 | ||
1284 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); | 1302 | for (j = PT_PAGE_TABLE_LEVEL; |
1303 | j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { | ||
1304 | unsigned long idx, idx_end; | ||
1305 | unsigned long *rmapp; | ||
1285 | 1306 | ||
1286 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 1307 | /* |
1287 | struct kvm_lpage_info *linfo; | 1308 | * {idx(page_j) | page_j intersects with |
1309 | * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. | ||
1310 | */ | ||
1311 | idx = gfn_to_index(gfn_start, memslot->base_gfn, j); | ||
1312 | idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); | ||
1288 | 1313 | ||
1289 | linfo = lpage_info_slot(gfn, memslot, | 1314 | rmapp = __gfn_to_rmap(gfn_start, j, memslot); |
1290 | PT_DIRECTORY_LEVEL + j); | 1315 | |
1291 | ret |= handler(kvm, &linfo->rmap_pde, data); | 1316 | for (; idx <= idx_end; ++idx) |
1292 | } | 1317 | ret |= handler(kvm, rmapp++, memslot, data); |
1293 | trace_kvm_age_page(hva, memslot, ret); | ||
1294 | retval |= ret; | ||
1295 | } | 1318 | } |
1296 | } | 1319 | } |
1297 | 1320 | ||
1298 | return retval; | 1321 | return ret; |
1322 | } | ||
1323 | |||
1324 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
1325 | unsigned long data, | ||
1326 | int (*handler)(struct kvm *kvm, unsigned long *rmapp, | ||
1327 | struct kvm_memory_slot *slot, | ||
1328 | unsigned long data)) | ||
1329 | { | ||
1330 | return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); | ||
1299 | } | 1331 | } |
1300 | 1332 | ||
1301 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | 1333 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) |
@@ -1303,13 +1335,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |||
1303 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); | 1335 | return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); |
1304 | } | 1336 | } |
1305 | 1337 | ||
1338 | int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) | ||
1339 | { | ||
1340 | return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); | ||
1341 | } | ||
1342 | |||
1306 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | 1343 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) |
1307 | { | 1344 | { |
1308 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); | 1345 | kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); |
1309 | } | 1346 | } |
1310 | 1347 | ||
1311 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1348 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1312 | unsigned long data) | 1349 | struct kvm_memory_slot *slot, unsigned long data) |
1313 | { | 1350 | { |
1314 | u64 *sptep; | 1351 | u64 *sptep; |
1315 | struct rmap_iterator uninitialized_var(iter); | 1352 | struct rmap_iterator uninitialized_var(iter); |
@@ -1323,8 +1360,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1323 | * This has some overhead, but not as much as the cost of swapping | 1360 | * This has some overhead, but not as much as the cost of swapping |
1324 | * out actively used pages or breaking up actively used hugepages. | 1361 | * out actively used pages or breaking up actively used hugepages. |
1325 | */ | 1362 | */ |
1326 | if (!shadow_accessed_mask) | 1363 | if (!shadow_accessed_mask) { |
1327 | return kvm_unmap_rmapp(kvm, rmapp, data); | 1364 | young = kvm_unmap_rmapp(kvm, rmapp, slot, data); |
1365 | goto out; | ||
1366 | } | ||
1328 | 1367 | ||
1329 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; | 1368 | for (sptep = rmap_get_first(*rmapp, &iter); sptep; |
1330 | sptep = rmap_get_next(&iter)) { | 1369 | sptep = rmap_get_next(&iter)) { |
@@ -1336,12 +1375,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
1336 | (unsigned long *)sptep); | 1375 | (unsigned long *)sptep); |
1337 | } | 1376 | } |
1338 | } | 1377 | } |
1339 | 1378 | out: | |
1379 | /* @data has hva passed to kvm_age_hva(). */ | ||
1380 | trace_kvm_age_page(data, slot, young); | ||
1340 | return young; | 1381 | return young; |
1341 | } | 1382 | } |
1342 | 1383 | ||
1343 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | 1384 | static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, |
1344 | unsigned long data) | 1385 | struct kvm_memory_slot *slot, unsigned long data) |
1345 | { | 1386 | { |
1346 | u64 *sptep; | 1387 | u64 *sptep; |
1347 | struct rmap_iterator iter; | 1388 | struct rmap_iterator iter; |
@@ -1379,13 +1420,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) | |||
1379 | 1420 | ||
1380 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); | 1421 | rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); |
1381 | 1422 | ||
1382 | kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); | 1423 | kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); |
1383 | kvm_flush_remote_tlbs(vcpu->kvm); | 1424 | kvm_flush_remote_tlbs(vcpu->kvm); |
1384 | } | 1425 | } |
1385 | 1426 | ||
1386 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | 1427 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) |
1387 | { | 1428 | { |
1388 | return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); | 1429 | return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); |
1389 | } | 1430 | } |
1390 | 1431 | ||
1391 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) | 1432 | int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) |
@@ -2472,14 +2513,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
2472 | unsigned long hva; | 2513 | unsigned long hva; |
2473 | 2514 | ||
2474 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); | 2515 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
2475 | if (!slot) { | 2516 | if (!slot) |
2476 | get_page(fault_page); | 2517 | return get_fault_pfn(); |
2477 | return page_to_pfn(fault_page); | ||
2478 | } | ||
2479 | 2518 | ||
2480 | hva = gfn_to_hva_memslot(slot, gfn); | 2519 | hva = gfn_to_hva_memslot(slot, gfn); |
2481 | 2520 | ||
2482 | return hva_to_pfn_atomic(vcpu->kvm, hva); | 2521 | return hva_to_pfn_atomic(hva); |
2483 | } | 2522 | } |
2484 | 2523 | ||
2485 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | 2524 | static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 9b7ec1150ab0..cfc258a6bf97 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Kernel-based Virtual Machine -- Performane Monitoring Unit support | 2 | * Kernel-based Virtual Machine -- Performance Monitoring Unit support |
3 | * | 3 | * |
4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. | 4 | * Copyright 2011 Red Hat, Inc. and/or its affiliates. |
5 | * | 5 | * |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index baead950d6c8..687d0c30e559 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) | |||
2063 | if (svm->nested.intercept & 1ULL) { | 2063 | if (svm->nested.intercept & 1ULL) { |
2064 | /* | 2064 | /* |
2065 | * The #vmexit can't be emulated here directly because this | 2065 | * The #vmexit can't be emulated here directly because this |
2066 | * code path runs with irqs and preemtion disabled. A | 2066 | * code path runs with irqs and preemption disabled. A |
2067 | * #vmexit emulation might sleep. Only signal request for | 2067 | * #vmexit emulation might sleep. Only signal request for |
2068 | * the #vmexit here. | 2068 | * the #vmexit here. |
2069 | */ | 2069 | */ |
@@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) | |||
2409 | { | 2409 | { |
2410 | /* | 2410 | /* |
2411 | * This function merges the msr permission bitmaps of kvm and the | 2411 | * This function merges the msr permission bitmaps of kvm and the |
2412 | * nested vmcb. It is omptimized in that it only merges the parts where | 2412 | * nested vmcb. It is optimized in that it only merges the parts where |
2413 | * the kvm msr permission bitmap may contain zero bits | 2413 | * the kvm msr permission bitmap may contain zero bits |
2414 | */ | 2414 | */ |
2415 | int i; | 2415 | int i; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c39b60707e02..2300e5319ed9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
1343 | guest_efer = vmx->vcpu.arch.efer; | 1343 | guest_efer = vmx->vcpu.arch.efer; |
1344 | 1344 | ||
1345 | /* | 1345 | /* |
1346 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 1346 | * NX is emulated; LMA and LME handled by hardware; SCE meaningless |
1347 | * outside long mode | 1347 | * outside long mode |
1348 | */ | 1348 | */ |
1349 | ignore_bits = EFER_NX | EFER_SCE; | 1349 | ignore_bits = EFER_NX | EFER_SCE; |
@@ -3261,7 +3261,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3261 | * qemu binaries. | 3261 | * qemu binaries. |
3262 | * IA32 arch specifies that at the time of processor reset the | 3262 | * IA32 arch specifies that at the time of processor reset the |
3263 | * "Accessed" bit in the AR field of segment registers is 1. And qemu | 3263 | * "Accessed" bit in the AR field of segment registers is 1. And qemu |
3264 | * is setting it to 0 in the usedland code. This causes invalid guest | 3264 | * is setting it to 0 in the userland code. This causes invalid guest |
3265 | * state vmexit when "unrestricted guest" mode is turned on. | 3265 | * state vmexit when "unrestricted guest" mode is turned on. |
3266 | * Fix for this setup issue in cpu_reset is being pushed in the qemu | 3266 | * Fix for this setup issue in cpu_reset is being pushed in the qemu |
3267 | * tree. Newer qemu binaries with that qemu fix would not need this | 3267 | * tree. Newer qemu binaries with that qemu fix would not need this |
@@ -4446,7 +4446,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
4446 | hypercall[2] = 0xc1; | 4446 | hypercall[2] = 0xc1; |
4447 | } | 4447 | } |
4448 | 4448 | ||
4449 | /* called to set cr0 as approriate for a mov-to-cr0 exit. */ | 4449 | /* called to set cr0 as appropriate for a mov-to-cr0 exit. */ |
4450 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) | 4450 | static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) |
4451 | { | 4451 | { |
4452 | if (to_vmx(vcpu)->nested.vmxon && | 4452 | if (to_vmx(vcpu)->nested.vmxon && |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59b59508ff07..3d9d08edbf29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -1093,7 +1093,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1093 | * For each generation, we track the original measured | 1093 | * For each generation, we track the original measured |
1094 | * nanosecond time, offset, and write, so if TSCs are in | 1094 | * nanosecond time, offset, and write, so if TSCs are in |
1095 | * sync, we can match exact offset, and if not, we can match | 1095 | * sync, we can match exact offset, and if not, we can match |
1096 | * exact software computaion in compute_guest_tsc() | 1096 | * exact software computation in compute_guest_tsc() |
1097 | * | 1097 | * |
1098 | * These values are tracked in kvm->arch.cur_xxx variables. | 1098 | * These values are tracked in kvm->arch.cur_xxx variables. |
1099 | */ | 1099 | */ |
@@ -1500,7 +1500,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
1500 | { | 1500 | { |
1501 | gpa_t gpa = data & ~0x3f; | 1501 | gpa_t gpa = data & ~0x3f; |
1502 | 1502 | ||
1503 | /* Bits 2:5 are resrved, Should be zero */ | 1503 | /* Bits 2:5 are reserved, Should be zero */ |
1504 | if (data & 0x3c) | 1504 | if (data & 0x3c) |
1505 | return 1; | 1505 | return 1; |
1506 | 1506 | ||
@@ -1723,7 +1723,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1723 | * Ignore all writes to this no longer documented MSR. | 1723 | * Ignore all writes to this no longer documented MSR. |
1724 | * Writes are only relevant for old K7 processors, | 1724 | * Writes are only relevant for old K7 processors, |
1725 | * all pre-dating SVM, but a recommended workaround from | 1725 | * all pre-dating SVM, but a recommended workaround from |
1726 | * AMD for these chips. It is possible to speicify the | 1726 | * AMD for these chips. It is possible to specify the |
1727 | * affected processor models on the command line, hence | 1727 | * affected processor models on the command line, hence |
1728 | * the need to ignore the workaround. | 1728 | * the need to ignore the workaround. |
1729 | */ | 1729 | */ |
@@ -2632,7 +2632,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) | |||
2632 | if (!vcpu->arch.time_page) | 2632 | if (!vcpu->arch.time_page) |
2633 | return -EINVAL; | 2633 | return -EINVAL; |
2634 | src->flags |= PVCLOCK_GUEST_STOPPED; | 2634 | src->flags |= PVCLOCK_GUEST_STOPPED; |
2635 | mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); | ||
2636 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 2635 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
2637 | return 0; | 2636 | return 0; |
2638 | } | 2637 | } |
@@ -4492,7 +4491,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) | |||
4492 | 4491 | ||
4493 | /* | 4492 | /* |
4494 | * if emulation was due to access to shadowed page table | 4493 | * if emulation was due to access to shadowed page table |
4495 | * and it failed try to unshadow page and re-entetr the | 4494 | * and it failed try to unshadow page and re-enter the |
4496 | * guest to let CPU execute the instruction. | 4495 | * guest to let CPU execute the instruction. |
4497 | */ | 4496 | */ |
4498 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) | 4497 | if (kvm_mmu_unprotect_page_virt(vcpu, gva)) |
@@ -5588,7 +5587,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
5588 | /* | 5587 | /* |
5589 | * We are here if userspace calls get_regs() in the middle of | 5588 | * We are here if userspace calls get_regs() in the middle of |
5590 | * instruction emulation. Registers state needs to be copied | 5589 | * instruction emulation. Registers state needs to be copied |
5591 | * back from emulation context to vcpu. Usrapace shouldn't do | 5590 | * back from emulation context to vcpu. Userspace shouldn't do |
5592 | * that usually, but some bad designed PV devices (vmware | 5591 | * that usually, but some bad designed PV devices (vmware |
5593 | * backdoor interface) need this to work | 5592 | * backdoor interface) need this to work |
5594 | */ | 5593 | */ |
@@ -6117,7 +6116,7 @@ int kvm_arch_hardware_enable(void *garbage) | |||
6117 | * as we reset last_host_tsc on all VCPUs to stop this from being | 6116 | * as we reset last_host_tsc on all VCPUs to stop this from being |
6118 | * called multiple times (one for each physical CPU bringup). | 6117 | * called multiple times (one for each physical CPU bringup). |
6119 | * | 6118 | * |
6120 | * Platforms with unnreliable TSCs don't have to deal with this, they | 6119 | * Platforms with unreliable TSCs don't have to deal with this, they |
6121 | * will be compensated by the logic in vcpu_load, which sets the TSC to | 6120 | * will be compensated by the logic in vcpu_load, which sets the TSC to |
6122 | * catchup mode. This will catchup all VCPUs to real time, but cannot | 6121 | * catchup mode. This will catchup all VCPUs to real time, but cannot |
6123 | * guarantee that they stay in perfect synchronization. | 6122 | * guarantee that they stay in perfect synchronization. |
@@ -6314,6 +6313,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |||
6314 | int i; | 6313 | int i; |
6315 | 6314 | ||
6316 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6315 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
6316 | if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) { | ||
6317 | kvm_kvfree(free->arch.rmap_pde[i]); | ||
6318 | free->arch.rmap_pde[i] = NULL; | ||
6319 | } | ||
6317 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { | 6320 | if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { |
6318 | kvm_kvfree(free->arch.lpage_info[i]); | 6321 | kvm_kvfree(free->arch.lpage_info[i]); |
6319 | free->arch.lpage_info[i] = NULL; | 6322 | free->arch.lpage_info[i] = NULL; |
@@ -6333,6 +6336,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6333 | lpages = gfn_to_index(slot->base_gfn + npages - 1, | 6336 | lpages = gfn_to_index(slot->base_gfn + npages - 1, |
6334 | slot->base_gfn, level) + 1; | 6337 | slot->base_gfn, level) + 1; |
6335 | 6338 | ||
6339 | slot->arch.rmap_pde[i] = | ||
6340 | kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i])); | ||
6341 | if (!slot->arch.rmap_pde[i]) | ||
6342 | goto out_free; | ||
6343 | |||
6336 | slot->arch.lpage_info[i] = | 6344 | slot->arch.lpage_info[i] = |
6337 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); | 6345 | kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); |
6338 | if (!slot->arch.lpage_info[i]) | 6346 | if (!slot->arch.lpage_info[i]) |
@@ -6361,7 +6369,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) | |||
6361 | 6369 | ||
6362 | out_free: | 6370 | out_free: |
6363 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { | 6371 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { |
6372 | kvm_kvfree(slot->arch.rmap_pde[i]); | ||
6364 | kvm_kvfree(slot->arch.lpage_info[i]); | 6373 | kvm_kvfree(slot->arch.lpage_info[i]); |
6374 | slot->arch.rmap_pde[i] = NULL; | ||
6365 | slot->arch.lpage_info[i] = NULL; | 6375 | slot->arch.lpage_info[i] = NULL; |
6366 | } | 6376 | } |
6367 | return -ENOMEM; | 6377 | return -ENOMEM; |
@@ -6381,7 +6391,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, | |||
6381 | map_flags = MAP_SHARED | MAP_ANONYMOUS; | 6391 | map_flags = MAP_SHARED | MAP_ANONYMOUS; |
6382 | 6392 | ||
6383 | /*To keep backward compatibility with older userspace, | 6393 | /*To keep backward compatibility with older userspace, |
6384 | *x86 needs to hanlde !user_alloc case. | 6394 | *x86 needs to handle !user_alloc case. |
6385 | */ | 6395 | */ |
6386 | if (!user_alloc) { | 6396 | if (!user_alloc) { |
6387 | if (npages && !old.rmap) { | 6397 | if (npages && !old.rmap) { |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b70b48b01098..1993eb1cb2cd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -183,6 +183,18 @@ struct kvm_vcpu { | |||
183 | } async_pf; | 183 | } async_pf; |
184 | #endif | 184 | #endif |
185 | 185 | ||
186 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
187 | /* | ||
188 | * Cpu relax intercept or pause loop exit optimization | ||
189 | * in_spin_loop: set when a vcpu does a pause loop exit | ||
190 | * or cpu relax intercepted. | ||
191 | * dy_eligible: indicates whether vcpu is eligible for directed yield. | ||
192 | */ | ||
193 | struct { | ||
194 | bool in_spin_loop; | ||
195 | bool dy_eligible; | ||
196 | } spin_loop; | ||
197 | #endif | ||
186 | struct kvm_vcpu_arch arch; | 198 | struct kvm_vcpu_arch arch; |
187 | }; | 199 | }; |
188 | 200 | ||
@@ -378,20 +390,11 @@ id_to_memslot(struct kvm_memslots *slots, int id) | |||
378 | return slot; | 390 | return slot; |
379 | } | 391 | } |
380 | 392 | ||
381 | #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) | ||
382 | #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) | ||
383 | static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } | ||
384 | |||
385 | extern struct page *bad_page; | 393 | extern struct page *bad_page; |
386 | extern struct page *fault_page; | ||
387 | |||
388 | extern pfn_t bad_pfn; | ||
389 | extern pfn_t fault_pfn; | ||
390 | 394 | ||
391 | int is_error_page(struct page *page); | 395 | int is_error_page(struct page *page); |
392 | int is_error_pfn(pfn_t pfn); | 396 | int is_error_pfn(pfn_t pfn); |
393 | int is_hwpoison_pfn(pfn_t pfn); | 397 | int is_hwpoison_pfn(pfn_t pfn); |
394 | int is_fault_pfn(pfn_t pfn); | ||
395 | int is_noslot_pfn(pfn_t pfn); | 398 | int is_noslot_pfn(pfn_t pfn); |
396 | int is_invalid_pfn(pfn_t pfn); | 399 | int is_invalid_pfn(pfn_t pfn); |
397 | int kvm_is_error_hva(unsigned long addr); | 400 | int kvm_is_error_hva(unsigned long addr); |
@@ -427,20 +430,20 @@ void kvm_release_page_dirty(struct page *page); | |||
427 | void kvm_set_page_dirty(struct page *page); | 430 | void kvm_set_page_dirty(struct page *page); |
428 | void kvm_set_page_accessed(struct page *page); | 431 | void kvm_set_page_accessed(struct page *page); |
429 | 432 | ||
430 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr); | 433 | pfn_t hva_to_pfn_atomic(unsigned long addr); |
431 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); | 434 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); |
432 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, | 435 | pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async, |
433 | bool write_fault, bool *writable); | 436 | bool write_fault, bool *writable); |
434 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | 437 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
435 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | 438 | pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, |
436 | bool *writable); | 439 | bool *writable); |
437 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 440 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); |
438 | struct kvm_memory_slot *slot, gfn_t gfn); | ||
439 | void kvm_release_pfn_dirty(pfn_t); | 441 | void kvm_release_pfn_dirty(pfn_t); |
440 | void kvm_release_pfn_clean(pfn_t pfn); | 442 | void kvm_release_pfn_clean(pfn_t pfn); |
441 | void kvm_set_pfn_dirty(pfn_t pfn); | 443 | void kvm_set_pfn_dirty(pfn_t pfn); |
442 | void kvm_set_pfn_accessed(pfn_t pfn); | 444 | void kvm_set_pfn_accessed(pfn_t pfn); |
443 | void kvm_get_pfn(pfn_t pfn); | 445 | void kvm_get_pfn(pfn_t pfn); |
446 | pfn_t get_fault_pfn(void); | ||
444 | 447 | ||
445 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, | 448 | int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, |
446 | int len); | 449 | int len); |
@@ -740,6 +743,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) | |||
740 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); | 743 | (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); |
741 | } | 744 | } |
742 | 745 | ||
746 | static inline gfn_t | ||
747 | hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) | ||
748 | { | ||
749 | gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT; | ||
750 | |||
751 | return slot->base_gfn + gfn_offset; | ||
752 | } | ||
753 | |||
743 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, | 754 | static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, |
744 | gfn_t gfn) | 755 | gfn_t gfn) |
745 | { | 756 | { |
@@ -899,5 +910,32 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) | |||
899 | } | 910 | } |
900 | } | 911 | } |
901 | 912 | ||
913 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
914 | |||
915 | static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) | ||
916 | { | ||
917 | vcpu->spin_loop.in_spin_loop = val; | ||
918 | } | ||
919 | static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) | ||
920 | { | ||
921 | vcpu->spin_loop.dy_eligible = val; | ||
922 | } | ||
923 | |||
924 | #else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ | ||
925 | |||
926 | static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) | ||
927 | { | ||
928 | } | ||
929 | |||
930 | static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) | ||
931 | { | ||
932 | } | ||
933 | |||
934 | static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | ||
935 | { | ||
936 | return true; | ||
937 | } | ||
938 | |||
939 | #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ | ||
902 | #endif | 940 | #endif |
903 | 941 | ||
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 28694f4a9139..d01b24b72c61 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF | |||
21 | 21 | ||
22 | config HAVE_KVM_MSI | 22 | config HAVE_KVM_MSI |
23 | bool | 23 | bool |
24 | |||
25 | config HAVE_KVM_CPU_RELAX_INTERCEPT | ||
26 | bool | ||
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e9fff9830bf0..c03f1fb26701 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
42 | static void kvm_iommu_put_pages(struct kvm *kvm, | 42 | static void kvm_iommu_put_pages(struct kvm *kvm, |
43 | gfn_t base_gfn, unsigned long npages); | 43 | gfn_t base_gfn, unsigned long npages); |
44 | 44 | ||
45 | static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | 45 | static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, |
46 | gfn_t gfn, unsigned long size) | 46 | unsigned long size) |
47 | { | 47 | { |
48 | gfn_t end_gfn; | 48 | gfn_t end_gfn; |
49 | pfn_t pfn; | 49 | pfn_t pfn; |
50 | 50 | ||
51 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); | 51 | pfn = gfn_to_pfn_memslot(slot, gfn); |
52 | end_gfn = gfn + (size >> PAGE_SHIFT); | 52 | end_gfn = gfn + (size >> PAGE_SHIFT); |
53 | gfn += 1; | 53 | gfn += 1; |
54 | 54 | ||
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, | |||
56 | return pfn; | 56 | return pfn; |
57 | 57 | ||
58 | while (gfn < end_gfn) | 58 | while (gfn < end_gfn) |
59 | gfn_to_pfn_memslot(kvm, slot, gfn++); | 59 | gfn_to_pfn_memslot(slot, gfn++); |
60 | 60 | ||
61 | return pfn; | 61 | return pfn; |
62 | } | 62 | } |
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
105 | * Pin all pages we are about to map in memory. This is | 105 | * Pin all pages we are about to map in memory. This is |
106 | * important because we unmap and unpin in 4kb steps later. | 106 | * important because we unmap and unpin in 4kb steps later. |
107 | */ | 107 | */ |
108 | pfn = kvm_pin_pages(kvm, slot, gfn, page_size); | 108 | pfn = kvm_pin_pages(slot, gfn, page_size); |
109 | if (is_error_pfn(pfn)) { | 109 | if (is_error_pfn(pfn)) { |
110 | gfn += 1; | 110 | gfn += 1; |
111 | continue; | 111 | continue; |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 83402d74a767..7118be0f2f2c 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
@@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, | |||
321 | switch (ue->u.irqchip.irqchip) { | 321 | switch (ue->u.irqchip.irqchip) { |
322 | case KVM_IRQCHIP_PIC_MASTER: | 322 | case KVM_IRQCHIP_PIC_MASTER: |
323 | e->set = kvm_set_pic_irq; | 323 | e->set = kvm_set_pic_irq; |
324 | max_pin = 16; | 324 | max_pin = PIC_NUM_PINS; |
325 | break; | 325 | break; |
326 | case KVM_IRQCHIP_PIC_SLAVE: | 326 | case KVM_IRQCHIP_PIC_SLAVE: |
327 | e->set = kvm_set_pic_irq; | 327 | e->set = kvm_set_pic_irq; |
328 | max_pin = 16; | 328 | max_pin = PIC_NUM_PINS; |
329 | delta = 8; | 329 | delta = 8; |
330 | break; | 330 | break; |
331 | case KVM_IRQCHIP_IOAPIC: | 331 | case KVM_IRQCHIP_IOAPIC: |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 246852397e30..0014ee99dc7f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); | |||
100 | 100 | ||
101 | static bool largepages_enabled = true; | 101 | static bool largepages_enabled = true; |
102 | 102 | ||
103 | struct page *bad_page; | ||
104 | static pfn_t bad_pfn; | ||
105 | |||
103 | static struct page *hwpoison_page; | 106 | static struct page *hwpoison_page; |
104 | static pfn_t hwpoison_pfn; | 107 | static pfn_t hwpoison_pfn; |
105 | 108 | ||
106 | struct page *fault_page; | 109 | static struct page *fault_page; |
107 | pfn_t fault_pfn; | 110 | static pfn_t fault_pfn; |
108 | 111 | ||
109 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 112 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
110 | { | 113 | { |
@@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |||
236 | } | 239 | } |
237 | vcpu->run = page_address(page); | 240 | vcpu->run = page_address(page); |
238 | 241 | ||
242 | kvm_vcpu_set_in_spin_loop(vcpu, false); | ||
243 | kvm_vcpu_set_dy_eligible(vcpu, false); | ||
244 | |||
239 | r = kvm_arch_vcpu_init(vcpu); | 245 | r = kvm_arch_vcpu_init(vcpu); |
240 | if (r < 0) | 246 | if (r < 0) |
241 | goto fail_free_run; | 247 | goto fail_free_run; |
@@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
332 | * count is also read inside the mmu_lock critical section. | 338 | * count is also read inside the mmu_lock critical section. |
333 | */ | 339 | */ |
334 | kvm->mmu_notifier_count++; | 340 | kvm->mmu_notifier_count++; |
335 | for (; start < end; start += PAGE_SIZE) | 341 | need_tlb_flush = kvm_unmap_hva_range(kvm, start, end); |
336 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | ||
337 | need_tlb_flush |= kvm->tlbs_dirty; | 342 | need_tlb_flush |= kvm->tlbs_dirty; |
338 | /* we've to flush the tlb before the pages can be freed */ | 343 | /* we've to flush the tlb before the pages can be freed */ |
339 | if (need_tlb_flush) | 344 | if (need_tlb_flush) |
@@ -950,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn) | |||
950 | } | 955 | } |
951 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); | 956 | EXPORT_SYMBOL_GPL(is_hwpoison_pfn); |
952 | 957 | ||
953 | int is_fault_pfn(pfn_t pfn) | ||
954 | { | ||
955 | return pfn == fault_pfn; | ||
956 | } | ||
957 | EXPORT_SYMBOL_GPL(is_fault_pfn); | ||
958 | |||
959 | int is_noslot_pfn(pfn_t pfn) | 958 | int is_noslot_pfn(pfn_t pfn) |
960 | { | 959 | { |
961 | return pfn == bad_pfn; | 960 | return pfn == bad_pfn; |
@@ -1039,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
1039 | } | 1038 | } |
1040 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 1039 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
1041 | 1040 | ||
1042 | static pfn_t get_fault_pfn(void) | 1041 | pfn_t get_fault_pfn(void) |
1043 | { | 1042 | { |
1044 | get_page(fault_page); | 1043 | get_page(fault_page); |
1045 | return fault_pfn; | 1044 | return fault_pfn; |
1046 | } | 1045 | } |
1046 | EXPORT_SYMBOL_GPL(get_fault_pfn); | ||
1047 | 1047 | ||
1048 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, | 1048 | int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, |
1049 | unsigned long start, int write, struct page **page) | 1049 | unsigned long start, int write, struct page **page) |
@@ -1065,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr) | |||
1065 | return rc == -EHWPOISON; | 1065 | return rc == -EHWPOISON; |
1066 | } | 1066 | } |
1067 | 1067 | ||
1068 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, | 1068 | static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async, |
1069 | bool *async, bool write_fault, bool *writable) | 1069 | bool write_fault, bool *writable) |
1070 | { | 1070 | { |
1071 | struct page *page[1]; | 1071 | struct page *page[1]; |
1072 | int npages = 0; | 1072 | int npages = 0; |
@@ -1146,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, | |||
1146 | return pfn; | 1146 | return pfn; |
1147 | } | 1147 | } |
1148 | 1148 | ||
1149 | pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) | 1149 | pfn_t hva_to_pfn_atomic(unsigned long addr) |
1150 | { | 1150 | { |
1151 | return hva_to_pfn(kvm, addr, true, NULL, true, NULL); | 1151 | return hva_to_pfn(addr, true, NULL, true, NULL); |
1152 | } | 1152 | } |
1153 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); | 1153 | EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); |
1154 | 1154 | ||
@@ -1166,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, | |||
1166 | return page_to_pfn(bad_page); | 1166 | return page_to_pfn(bad_page); |
1167 | } | 1167 | } |
1168 | 1168 | ||
1169 | return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); | 1169 | return hva_to_pfn(addr, atomic, async, write_fault, writable); |
1170 | } | 1170 | } |
1171 | 1171 | ||
1172 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) | 1172 | pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) |
@@ -1195,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, | |||
1195 | } | 1195 | } |
1196 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); | 1196 | EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); |
1197 | 1197 | ||
1198 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | 1198 | pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) |
1199 | struct kvm_memory_slot *slot, gfn_t gfn) | ||
1200 | { | 1199 | { |
1201 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | 1200 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); |
1202 | return hva_to_pfn(kvm, addr, false, NULL, true, NULL); | 1201 | return hva_to_pfn(addr, false, NULL, true, NULL); |
1203 | } | 1202 | } |
1204 | 1203 | ||
1205 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, | 1204 | int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, |
@@ -1580,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) | |||
1580 | } | 1579 | } |
1581 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); | 1580 | EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); |
1582 | 1581 | ||
1582 | #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT | ||
1583 | /* | ||
1584 | * Helper that checks whether a VCPU is eligible for directed yield. | ||
1585 | * Most eligible candidate to yield is decided by following heuristics: | ||
1586 | * | ||
1587 | * (a) VCPU which has not done pl-exit or cpu relax intercepted recently | ||
1588 | * (preempted lock holder), indicated by @in_spin_loop. | ||
1589 | * Set at the beiginning and cleared at the end of interception/PLE handler. | ||
1590 | * | ||
1591 | * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get | ||
1592 | * chance last time (mostly it has become eligible now since we have probably | ||
1593 | * yielded to lockholder in last iteration. This is done by toggling | ||
1594 | * @dy_eligible each time a VCPU checked for eligibility.) | ||
1595 | * | ||
1596 | * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding | ||
1597 | * to preempted lock-holder could result in wrong VCPU selection and CPU | ||
1598 | * burning. Giving priority for a potential lock-holder increases lock | ||
1599 | * progress. | ||
1600 | * | ||
1601 | * Since algorithm is based on heuristics, accessing another VCPU data without | ||
1602 | * locking does not harm. It may result in trying to yield to same VCPU, fail | ||
1603 | * and continue with next VCPU and so on. | ||
1604 | */ | ||
1605 | bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) | ||
1606 | { | ||
1607 | bool eligible; | ||
1608 | |||
1609 | eligible = !vcpu->spin_loop.in_spin_loop || | ||
1610 | (vcpu->spin_loop.in_spin_loop && | ||
1611 | vcpu->spin_loop.dy_eligible); | ||
1612 | |||
1613 | if (vcpu->spin_loop.in_spin_loop) | ||
1614 | kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); | ||
1615 | |||
1616 | return eligible; | ||
1617 | } | ||
1618 | #endif | ||
1583 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) | 1619 | void kvm_vcpu_on_spin(struct kvm_vcpu *me) |
1584 | { | 1620 | { |
1585 | struct kvm *kvm = me->kvm; | 1621 | struct kvm *kvm = me->kvm; |
@@ -1589,6 +1625,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1589 | int pass; | 1625 | int pass; |
1590 | int i; | 1626 | int i; |
1591 | 1627 | ||
1628 | kvm_vcpu_set_in_spin_loop(me, true); | ||
1592 | /* | 1629 | /* |
1593 | * We boost the priority of a VCPU that is runnable but not | 1630 | * We boost the priority of a VCPU that is runnable but not |
1594 | * currently running, because it got preempted by something | 1631 | * currently running, because it got preempted by something |
@@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1607 | continue; | 1644 | continue; |
1608 | if (waitqueue_active(&vcpu->wq)) | 1645 | if (waitqueue_active(&vcpu->wq)) |
1609 | continue; | 1646 | continue; |
1647 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | ||
1648 | continue; | ||
1610 | if (kvm_vcpu_yield_to(vcpu)) { | 1649 | if (kvm_vcpu_yield_to(vcpu)) { |
1611 | kvm->last_boosted_vcpu = i; | 1650 | kvm->last_boosted_vcpu = i; |
1612 | yielded = 1; | 1651 | yielded = 1; |
@@ -1614,6 +1653,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |||
1614 | } | 1653 | } |
1615 | } | 1654 | } |
1616 | } | 1655 | } |
1656 | kvm_vcpu_set_in_spin_loop(me, false); | ||
1657 | |||
1658 | /* Ensure vcpu is not eligible during next spinloop */ | ||
1659 | kvm_vcpu_set_dy_eligible(me, false); | ||
1617 | } | 1660 | } |
1618 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | 1661 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); |
1619 | 1662 | ||
@@ -2697,9 +2740,6 @@ static struct syscore_ops kvm_syscore_ops = { | |||
2697 | .resume = kvm_resume, | 2740 | .resume = kvm_resume, |
2698 | }; | 2741 | }; |
2699 | 2742 | ||
2700 | struct page *bad_page; | ||
2701 | pfn_t bad_pfn; | ||
2702 | |||
2703 | static inline | 2743 | static inline |
2704 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) | 2744 | struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) |
2705 | { | 2745 | { |