Merge branch 'queue' into next

Merge patches queued during the run-up to the merge window. * queue: (25 commits) KVM: Choose better candidate for directed yield KVM: Note down when cpu relax intercepted or pause loop exited KVM: Add config to support ple or cpu relax optimzation KVM: switch to symbolic name for irq_states size KVM: x86: Fix typos in pmu.c KVM: x86: Fix typos in lapic.c KVM: x86: Fix typos in cpuid.c KVM: x86: Fix typos in emulate.c KVM: x86: Fix typos in x86.c KVM: SVM: Fix typos KVM: VMX: Fix typos KVM: remove the unused parameter of gfn_to_pfn_memslot KVM: remove is_error_hpa KVM: make bad_pfn static to kvm_main.c KVM: using get_fault_pfn to get the fault pfn KVM: MMU: track the refcount when unmap the page KVM: x86: remove unnecessary mark_page_dirty KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range() KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp() KVM: MMU: Add memslot parameter to hva handlers ... Signed-off-by: Avi Kivity <avi@redhat.com>
author: Avi Kivity <avi@redhat.com> 2012-07-26 04:54:21 -0400
committer: Avi Kivity <avi@redhat.com> 2012-07-26 04:54:21 -0400
commit: e9bda6f6f902e6b55d9baceb5523468a048cbe56 (patch)
tree: bf09cc165da1197cd34967da0593d08b9a37c0f3 /virt/kvm
parent: bdc0077af574800d24318b6945cf2344e8dbb050 (diff)
parent: 06e48c510aa37f6e791602e6420422ea7071fe94 (diff)
4 files changed, 72 insertions, 29 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 28694f4a9139..d01b24b72c61 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
 config HAVE_KVM_MSI
       bool
+config HAVE_KVM_CPU_RELAX_INTERCEPT
+       bool
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9fff9830bf0..c03f1fb26701 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
 static void kvm_iommu_put_pages(struct kvm *kvm,
                                gfn_t base_gfn, unsigned long npages);
-static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
-                           gfn_t gfn, unsigned long size)
+                           unsigned long size)
 {
        gfn_t end_gfn;
        pfn_t pfn;
-        pfn     = gfn_to_pfn_memslot(kvm, slot, gfn);
+        pfn     = gfn_to_pfn_memslot(slot, gfn);
        end_gfn = gfn + (size >> PAGE_SHIFT);
        gfn    += 1;
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
                return pfn;
        while (gfn < end_gfn)
-                gfn_to_pfn_memslot(kvm, slot, gfn++);
+                gfn_to_pfn_memslot(slot, gfn++);
        return pfn;
 }
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
                 * Pin all pages we are about to map in memory. This is
                 * important because we unmap and unpin in 4kb steps later.
                 */
-                pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+                pfn = kvm_pin_pages(slot, gfn, page_size);
                if (is_error_pfn(pfn)) {
                        gfn += 1;
                        continue;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 83402d74a767..7118be0f2f2c 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
                switch (ue->u.irqchip.irqchip) {
                case KVM_IRQCHIP_PIC_MASTER:
                        e->set = kvm_set_pic_irq;
-                        max_pin = 16;
+                        max_pin = PIC_NUM_PINS;
                        break;
                case KVM_IRQCHIP_PIC_SLAVE:
                        e->set = kvm_set_pic_irq;
-                        max_pin = 16;
+                        max_pin = PIC_NUM_PINS;
                        delta = 8;
                        break;
                case KVM_IRQCHIP_IOAPIC:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 246852397e30..0014ee99dc7f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 static bool largepages_enabled = true;
+struct page *bad_page;
+static pfn_t bad_pfn;
 static struct page *hwpoison_page;
 static pfn_t hwpoison_pfn;
-struct page *fault_page;
+static struct page *fault_page;
-pfn_t fault_pfn;
+static pfn_t fault_pfn;
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
@@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
        }
        vcpu->run = page_address(page);
+        kvm_vcpu_set_in_spin_loop(vcpu, false);
+        kvm_vcpu_set_dy_eligible(vcpu, false);
        r = kvm_arch_vcpu_init(vcpu);
        if (r < 0)
                goto fail_free_run;
@@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
         * count is also read inside the mmu_lock critical section.
         */
        kvm->mmu_notifier_count++;
-        for (; start < end; start += PAGE_SIZE)
+        need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
-                need_tlb_flush |= kvm_unmap_hva(kvm, start);
        need_tlb_flush |= kvm->tlbs_dirty;
        /* we've to flush the tlb before the pages can be freed */
        if (need_tlb_flush)
@@ -950,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-int is_fault_pfn(pfn_t pfn)
-{
-        return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
 int is_noslot_pfn(pfn_t pfn)
 {
        return pfn == bad_pfn;
@@ -1039,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
-static pfn_t get_fault_pfn(void)
+pfn_t get_fault_pfn(void)
 {
        get_page(fault_page);
        return fault_pfn;
 }
+EXPORT_SYMBOL_GPL(get_fault_pfn);
 int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
        unsigned long start, int write, struct page **page)
@@ -1065,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
        return rc == -EHWPOISON;
 }
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
-                        bool *async, bool write_fault, bool *writable)
+                        bool write_fault, bool *writable)
 {
        struct page *page[1];
        int npages = 0;
@@ -1146,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
        return pfn;
 }
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
 {
-        return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+        return hva_to_pfn(addr, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
@@ -1166,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
                return page_to_pfn(bad_page);
        }
-        return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+        return hva_to_pfn(addr, atomic, async, write_fault, writable);
 }
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1195,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
-                         struct kvm_memory_slot *slot, gfn_t gfn)
 {
        unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-        return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+        return hva_to_pfn(addr, false, NULL, true, NULL);
 }
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1580,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
+/*
+ * Helper that checks whether a VCPU is eligible for directed yield.
+ * Most eligible candidate to yield is decided by following heuristics:
+ *
+ *  (a) VCPU which has not done pl-exit or cpu relax intercepted recently
+ *  (preempted lock holder), indicated by @in_spin_loop.
+ *  Set at the beiginning and cleared at the end of interception/PLE handler.
+ *
+ *  (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
+ *  chance last time (mostly it has become eligible now since we have probably
+ *  yielded to lockholder in last iteration. This is done by toggling
+ *  @dy_eligible each time a VCPU checked for eligibility.)
+ *
+ *  Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
+ *  to preempted lock-holder could result in wrong VCPU selection and CPU
+ *  burning. Giving priority for a potential lock-holder increases lock
+ *  progress.
+ *
+ *  Since algorithm is based on heuristics, accessing another VCPU data without
+ *  locking does not harm. It may result in trying to yield to  same VCPU, fail
+ *  and continue with next VCPU and so on.
+ */
+bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+{
+        bool eligible;
+        eligible = !vcpu->spin_loop.in_spin_loop ||
+                        (vcpu->spin_loop.in_spin_loop &&
+                         vcpu->spin_loop.dy_eligible);
+        if (vcpu->spin_loop.in_spin_loop)
+                kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
+        return eligible;
+}
+#endif
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {
        struct kvm *kvm = me->kvm;
@@ -1589,6 +1625,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
        int pass;
        int i;
+        kvm_vcpu_set_in_spin_loop(me, true);
        /*
         * We boost the priority of a VCPU that is runnable but not
         * currently running, because it got preempted by something
@@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                                continue;
                        if (waitqueue_active(&vcpu->wq))
                                continue;
+                        if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
+                                continue;
                        if (kvm_vcpu_yield_to(vcpu)) {
                                kvm->last_boosted_vcpu = i;
                                yielded = 1;
@@ -1614,6 +1653,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
                        }
                }
        }
+        kvm_vcpu_set_in_spin_loop(me, false);
+        /* Ensure vcpu is not eligible during next spinloop */
+        kvm_vcpu_set_dy_eligible(me, false);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
@@ -2697,9 +2740,6 @@ static struct syscore_ops kvm_syscore_ops = {
        .resume = kvm_resume,
 };
-struct page *bad_page;
-pfn_t bad_pfn;
 static inline
 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
 {
author	Avi Kivity <avi@redhat.com>	2012-07-26 04:54:21 -0400
committer	Avi Kivity <avi@redhat.com>	2012-07-26 04:54:21 -0400
commit	e9bda6f6f902e6b55d9baceb5523468a048cbe56 (patch)
tree	bf09cc165da1197cd34967da0593d08b9a37c0f3 /virt/kvm
parent	bdc0077af574800d24318b6945cf2344e8dbb050 (diff)
parent	06e48c510aa37f6e791602e6420422ea7071fe94 (diff)