aboutsummaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
authorAvi Kivity <avi@redhat.com>2012-07-26 04:54:21 -0400
committerAvi Kivity <avi@redhat.com>2012-07-26 04:54:21 -0400
commite9bda6f6f902e6b55d9baceb5523468a048cbe56 (patch)
treebf09cc165da1197cd34967da0593d08b9a37c0f3 /virt/kvm
parentbdc0077af574800d24318b6945cf2344e8dbb050 (diff)
parent06e48c510aa37f6e791602e6420422ea7071fe94 (diff)
Merge branch 'queue' into next
Merge patches queued during the run-up to the merge window. * queue: (25 commits) KVM: Choose better candidate for directed yield KVM: Note down when cpu relax intercepted or pause loop exited KVM: Add config to support ple or cpu relax optimzation KVM: switch to symbolic name for irq_states size KVM: x86: Fix typos in pmu.c KVM: x86: Fix typos in lapic.c KVM: x86: Fix typos in cpuid.c KVM: x86: Fix typos in emulate.c KVM: x86: Fix typos in x86.c KVM: SVM: Fix typos KVM: VMX: Fix typos KVM: remove the unused parameter of gfn_to_pfn_memslot KVM: remove is_error_hpa KVM: make bad_pfn static to kvm_main.c KVM: using get_fault_pfn to get the fault pfn KVM: MMU: track the refcount when unmap the page KVM: x86: remove unnecessary mark_page_dirty KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range() KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp() KVM: MMU: Add memslot parameter to hva handlers ... Signed-off-by: Avi Kivity <avi@redhat.com>
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/iommu.c10
-rw-r--r--virt/kvm/irq_comm.c4
-rw-r--r--virt/kvm/kvm_main.c84
4 files changed, 72 insertions, 29 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 28694f4a9139..d01b24b72c61 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
21 21
22config HAVE_KVM_MSI 22config HAVE_KVM_MSI
23 bool 23 bool
24
25config HAVE_KVM_CPU_RELAX_INTERCEPT
26 bool
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9fff9830bf0..c03f1fb26701 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
42static void kvm_iommu_put_pages(struct kvm *kvm, 42static void kvm_iommu_put_pages(struct kvm *kvm,
43 gfn_t base_gfn, unsigned long npages); 43 gfn_t base_gfn, unsigned long npages);
44 44
45static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot, 45static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
46 gfn_t gfn, unsigned long size) 46 unsigned long size)
47{ 47{
48 gfn_t end_gfn; 48 gfn_t end_gfn;
49 pfn_t pfn; 49 pfn_t pfn;
50 50
51 pfn = gfn_to_pfn_memslot(kvm, slot, gfn); 51 pfn = gfn_to_pfn_memslot(slot, gfn);
52 end_gfn = gfn + (size >> PAGE_SHIFT); 52 end_gfn = gfn + (size >> PAGE_SHIFT);
53 gfn += 1; 53 gfn += 1;
54 54
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
56 return pfn; 56 return pfn;
57 57
58 while (gfn < end_gfn) 58 while (gfn < end_gfn)
59 gfn_to_pfn_memslot(kvm, slot, gfn++); 59 gfn_to_pfn_memslot(slot, gfn++);
60 60
61 return pfn; 61 return pfn;
62} 62}
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
105 * Pin all pages we are about to map in memory. This is 105 * Pin all pages we are about to map in memory. This is
106 * important because we unmap and unpin in 4kb steps later. 106 * important because we unmap and unpin in 4kb steps later.
107 */ 107 */
108 pfn = kvm_pin_pages(kvm, slot, gfn, page_size); 108 pfn = kvm_pin_pages(slot, gfn, page_size);
109 if (is_error_pfn(pfn)) { 109 if (is_error_pfn(pfn)) {
110 gfn += 1; 110 gfn += 1;
111 continue; 111 continue;
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 83402d74a767..7118be0f2f2c 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -321,11 +321,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
321 switch (ue->u.irqchip.irqchip) { 321 switch (ue->u.irqchip.irqchip) {
322 case KVM_IRQCHIP_PIC_MASTER: 322 case KVM_IRQCHIP_PIC_MASTER:
323 e->set = kvm_set_pic_irq; 323 e->set = kvm_set_pic_irq;
324 max_pin = 16; 324 max_pin = PIC_NUM_PINS;
325 break; 325 break;
326 case KVM_IRQCHIP_PIC_SLAVE: 326 case KVM_IRQCHIP_PIC_SLAVE:
327 e->set = kvm_set_pic_irq; 327 e->set = kvm_set_pic_irq;
328 max_pin = 16; 328 max_pin = PIC_NUM_PINS;
329 delta = 8; 329 delta = 8;
330 break; 330 break;
331 case KVM_IRQCHIP_IOAPIC: 331 case KVM_IRQCHIP_IOAPIC:
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 246852397e30..0014ee99dc7f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -100,11 +100,14 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
100 100
101static bool largepages_enabled = true; 101static bool largepages_enabled = true;
102 102
103struct page *bad_page;
104static pfn_t bad_pfn;
105
103static struct page *hwpoison_page; 106static struct page *hwpoison_page;
104static pfn_t hwpoison_pfn; 107static pfn_t hwpoison_pfn;
105 108
106struct page *fault_page; 109static struct page *fault_page;
107pfn_t fault_pfn; 110static pfn_t fault_pfn;
108 111
109inline int kvm_is_mmio_pfn(pfn_t pfn) 112inline int kvm_is_mmio_pfn(pfn_t pfn)
110{ 113{
@@ -236,6 +239,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
236 } 239 }
237 vcpu->run = page_address(page); 240 vcpu->run = page_address(page);
238 241
242 kvm_vcpu_set_in_spin_loop(vcpu, false);
243 kvm_vcpu_set_dy_eligible(vcpu, false);
244
239 r = kvm_arch_vcpu_init(vcpu); 245 r = kvm_arch_vcpu_init(vcpu);
240 if (r < 0) 246 if (r < 0)
241 goto fail_free_run; 247 goto fail_free_run;
@@ -332,8 +338,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
332 * count is also read inside the mmu_lock critical section. 338 * count is also read inside the mmu_lock critical section.
333 */ 339 */
334 kvm->mmu_notifier_count++; 340 kvm->mmu_notifier_count++;
335 for (; start < end; start += PAGE_SIZE) 341 need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
336 need_tlb_flush |= kvm_unmap_hva(kvm, start);
337 need_tlb_flush |= kvm->tlbs_dirty; 342 need_tlb_flush |= kvm->tlbs_dirty;
338 /* we've to flush the tlb before the pages can be freed */ 343 /* we've to flush the tlb before the pages can be freed */
339 if (need_tlb_flush) 344 if (need_tlb_flush)
@@ -950,12 +955,6 @@ int is_hwpoison_pfn(pfn_t pfn)
950} 955}
951EXPORT_SYMBOL_GPL(is_hwpoison_pfn); 956EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
952 957
953int is_fault_pfn(pfn_t pfn)
954{
955 return pfn == fault_pfn;
956}
957EXPORT_SYMBOL_GPL(is_fault_pfn);
958
959int is_noslot_pfn(pfn_t pfn) 958int is_noslot_pfn(pfn_t pfn)
960{ 959{
961 return pfn == bad_pfn; 960 return pfn == bad_pfn;
@@ -1039,11 +1038,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
1039} 1038}
1040EXPORT_SYMBOL_GPL(gfn_to_hva); 1039EXPORT_SYMBOL_GPL(gfn_to_hva);
1041 1040
1042static pfn_t get_fault_pfn(void) 1041pfn_t get_fault_pfn(void)
1043{ 1042{
1044 get_page(fault_page); 1043 get_page(fault_page);
1045 return fault_pfn; 1044 return fault_pfn;
1046} 1045}
1046EXPORT_SYMBOL_GPL(get_fault_pfn);
1047 1047
1048int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, 1048int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
1049 unsigned long start, int write, struct page **page) 1049 unsigned long start, int write, struct page **page)
@@ -1065,8 +1065,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
1065 return rc == -EHWPOISON; 1065 return rc == -EHWPOISON;
1066} 1066}
1067 1067
1068static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic, 1068static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
1069 bool *async, bool write_fault, bool *writable) 1069 bool write_fault, bool *writable)
1070{ 1070{
1071 struct page *page[1]; 1071 struct page *page[1];
1072 int npages = 0; 1072 int npages = 0;
@@ -1146,9 +1146,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
1146 return pfn; 1146 return pfn;
1147} 1147}
1148 1148
1149pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr) 1149pfn_t hva_to_pfn_atomic(unsigned long addr)
1150{ 1150{
1151 return hva_to_pfn(kvm, addr, true, NULL, true, NULL); 1151 return hva_to_pfn(addr, true, NULL, true, NULL);
1152} 1152}
1153EXPORT_SYMBOL_GPL(hva_to_pfn_atomic); 1153EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
1154 1154
@@ -1166,7 +1166,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
1166 return page_to_pfn(bad_page); 1166 return page_to_pfn(bad_page);
1167 } 1167 }
1168 1168
1169 return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable); 1169 return hva_to_pfn(addr, atomic, async, write_fault, writable);
1170} 1170}
1171 1171
1172pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) 1172pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1195,11 +1195,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
1195} 1195}
1196EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); 1196EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
1197 1197
1198pfn_t gfn_to_pfn_memslot(struct kvm *kvm, 1198pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
1199 struct kvm_memory_slot *slot, gfn_t gfn)
1200{ 1199{
1201 unsigned long addr = gfn_to_hva_memslot(slot, gfn); 1200 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
1202 return hva_to_pfn(kvm, addr, false, NULL, true, NULL); 1201 return hva_to_pfn(addr, false, NULL, true, NULL);
1203} 1202}
1204 1203
1205int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages, 1204int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
@@ -1580,6 +1579,43 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
1580} 1579}
1581EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); 1580EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
1582 1581
1582#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
1583/*
1584 * Helper that checks whether a VCPU is eligible for directed yield.
1585 * Most eligible candidate to yield is decided by following heuristics:
1586 *
1587 * (a) VCPU which has not done pl-exit or cpu relax intercepted recently
1588 * (preempted lock holder), indicated by @in_spin_loop.
1589 * Set at the beiginning and cleared at the end of interception/PLE handler.
1590 *
1591 * (b) VCPU which has done pl-exit/ cpu relax intercepted but did not get
1592 * chance last time (mostly it has become eligible now since we have probably
1593 * yielded to lockholder in last iteration. This is done by toggling
1594 * @dy_eligible each time a VCPU checked for eligibility.)
1595 *
1596 * Yielding to a recently pl-exited/cpu relax intercepted VCPU before yielding
1597 * to preempted lock-holder could result in wrong VCPU selection and CPU
1598 * burning. Giving priority for a potential lock-holder increases lock
1599 * progress.
1600 *
1601 * Since algorithm is based on heuristics, accessing another VCPU data without
1602 * locking does not harm. It may result in trying to yield to same VCPU, fail
1603 * and continue with next VCPU and so on.
1604 */
1605bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
1606{
1607 bool eligible;
1608
1609 eligible = !vcpu->spin_loop.in_spin_loop ||
1610 (vcpu->spin_loop.in_spin_loop &&
1611 vcpu->spin_loop.dy_eligible);
1612
1613 if (vcpu->spin_loop.in_spin_loop)
1614 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
1615
1616 return eligible;
1617}
1618#endif
1583void kvm_vcpu_on_spin(struct kvm_vcpu *me) 1619void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1584{ 1620{
1585 struct kvm *kvm = me->kvm; 1621 struct kvm *kvm = me->kvm;
@@ -1589,6 +1625,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1589 int pass; 1625 int pass;
1590 int i; 1626 int i;
1591 1627
1628 kvm_vcpu_set_in_spin_loop(me, true);
1592 /* 1629 /*
1593 * We boost the priority of a VCPU that is runnable but not 1630 * We boost the priority of a VCPU that is runnable but not
1594 * currently running, because it got preempted by something 1631 * currently running, because it got preempted by something
@@ -1607,6 +1644,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1607 continue; 1644 continue;
1608 if (waitqueue_active(&vcpu->wq)) 1645 if (waitqueue_active(&vcpu->wq))
1609 continue; 1646 continue;
1647 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
1648 continue;
1610 if (kvm_vcpu_yield_to(vcpu)) { 1649 if (kvm_vcpu_yield_to(vcpu)) {
1611 kvm->last_boosted_vcpu = i; 1650 kvm->last_boosted_vcpu = i;
1612 yielded = 1; 1651 yielded = 1;
@@ -1614,6 +1653,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
1614 } 1653 }
1615 } 1654 }
1616 } 1655 }
1656 kvm_vcpu_set_in_spin_loop(me, false);
1657
1658 /* Ensure vcpu is not eligible during next spinloop */
1659 kvm_vcpu_set_dy_eligible(me, false);
1617} 1660}
1618EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); 1661EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
1619 1662
@@ -2697,9 +2740,6 @@ static struct syscore_ops kvm_syscore_ops = {
2697 .resume = kvm_resume, 2740 .resume = kvm_resume,
2698}; 2741};
2699 2742
2700struct page *bad_page;
2701pfn_t bad_pfn;
2702
2703static inline 2743static inline
2704struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) 2744struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
2705{ 2745{