From 92eca8faad2d1b136c939bc122842dcdabd6ff46 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 20 May 2012 13:13:28 +0900 Subject: KVM: Separate out dirty_bitmap allocation code as kvm_kvzalloc() Will be used for lpage_info allocation later. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7e140683ff14..1148c96a4817 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -516,16 +516,32 @@ out_err_nodisable: return ERR_PTR(r); } +/* + * Avoid using vmalloc for a small buffer. + * Should not be used when the size is statically known. + */ +static void *kvm_kvzalloc(unsigned long size) +{ + if (size > PAGE_SIZE) + return vzalloc(size); + else + return kzalloc(size, GFP_KERNEL); +} + +static void kvm_kvfree(const void *addr) +{ + if (is_vmalloc_addr(addr)) + vfree(addr); + else + kfree(addr); +} + static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { if (!memslot->dirty_bitmap) return; - if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE) - vfree(memslot->dirty_bitmap); - else - kfree(memslot->dirty_bitmap); - + kvm_kvfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; } @@ -617,11 +633,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) #ifndef CONFIG_S390 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot); - if (dirty_bytes > PAGE_SIZE) - memslot->dirty_bitmap = vzalloc(dirty_bytes); - else - memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL); - + memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes); if (!memslot->dirty_bitmap) return -ENOMEM; -- cgit v1.2.2 From c1a7b32a14138f908df52d7c53b5ce3415ec6b50 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 20 May 2012 13:15:07 +0900 Subject: KVM: Avoid wasting pages for small lpage_info arrays lpage_info is created for each large level even when the memory slot is not for RAM. This means that when we add one slot for a PCI device, we end up allocating at least KVM_NR_PAGE_SIZES - 1 pages by vmalloc(). To make things worse, there is an increasing number of devices which would result in more pages being wasted this way. This patch mitigates this problem by using kvm_kvzalloc(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1148c96a4817..02cb440f802d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -520,7 +520,7 @@ out_err_nodisable: * Avoid using vmalloc for a small buffer. * Should not be used when the size is statically known. */ -static void *kvm_kvzalloc(unsigned long size) +void *kvm_kvzalloc(unsigned long size) { if (size > PAGE_SIZE) return vzalloc(size); @@ -528,7 +528,7 @@ static void *kvm_kvzalloc(unsigned long size) return kzalloc(size, GFP_KERNEL); } -static void kvm_kvfree(const void *addr) +void kvm_kvfree(const void *addr) { if (is_vmalloc_addr(addr)) vfree(addr); -- cgit v1.2.2 From 9900b4b48b095895cf962054e45aafa49ef70f74 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 15 Jun 2012 15:07:02 -0400 Subject: KVM: use KVM_CAP_IRQ_ROUTING to protect the routing related code The KVM code sometimes uses CONFIG_HAVE_KVM_IRQCHIP to protect code that is related to IRQ routing, which not all in-kernel irqchips may support. Use KVM_CAP_IRQ_ROUTING instead. Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 02cb440f802d..636bd08bb399 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2225,7 +2225,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_SIGNAL_MSI: #endif return 1; -#ifdef CONFIG_HAVE_KVM_IRQCHIP +#ifdef KVM_CAP_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; #endif -- cgit v1.2.2 From 5cfc2aabcb282f4554e7086c9893b386ad6ba9d4 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 19 Jun 2012 16:51:04 -0400 Subject: KVM: handle last_boosted_vcpu = 0 case If last_boosted_vcpu == 0, then we fall through all test cases and may end up with all VCPUs pouncing on vcpu 0. With a large enough guest, this can result in enormous runqueue lock contention, which can prevent vcpu0 from running, leading to a livelock. Changing < to <= makes sure we properly handle that case. Signed-off-by: Rik van Riel Signed-off-by: Marcelo Tosatti --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 636bd08bb399..b3ce91c623e2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1598,7 +1598,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) */ for (pass = 0; pass < 2 && !yielded; pass++) { kvm_for_each_vcpu(i, vcpu, kvm) { - if (!pass && i < last_boosted_vcpu) { + if (!pass && i <= last_boosted_vcpu) { i = last_boosted_vcpu; continue; } else if (pass && i > last_boosted_vcpu) -- cgit v1.2.2 From a76beb14123a69ca080f5a5425e28b786d62318d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 9 Jul 2012 10:53:22 -0600 Subject: KVM: Fix device assignment threaded irq handler The kernel no longer allows us to pass NULL for the hard handler without also specifying IRQF_ONESHOT. IRQF_ONESHOT imposes latency in the exit path that we don't need for MSI interrupts. Long term we'd like to inject these interrupts from the hard handler when possible. In the short term, we can create dummy hard handlers that return us to the previous behavior. Credit to Michael for original patch. Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=43328 Signed-off-by: Michael S. Tsirkin Signed-off-by: Alex Williamson Signed-off-by: Avi Kivity --- virt/kvm/assigned-dev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index b1e091ae2f37..23a41a9f8db9 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -334,6 +334,11 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, } #ifdef __KVM_HAVE_MSI +static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) +{ + return IRQ_WAKE_THREAD; +} + static int assigned_device_enable_host_msi(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { @@ -346,7 +351,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, } dev->host_irq = dev->dev->irq; - if (request_threaded_irq(dev->host_irq, NULL, + if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, kvm_assigned_dev_thread_msi, 0, dev->irq_name, dev)) { pci_disable_msi(dev->dev); @@ -358,6 +363,11 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, #endif #ifdef __KVM_HAVE_MSIX +static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) +{ + return IRQ_WAKE_THREAD; +} + static int assigned_device_enable_host_msix(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { @@ -374,7 +384,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, for (i = 0; i < dev->entries_nr; i++) { r = request_threaded_irq(dev->host_msix_entries[i].vector, - NULL, kvm_assigned_dev_thread_msix, + kvm_assigned_dev_msix, + kvm_assigned_dev_thread_msix, 0, dev->irq_name, dev); if (r) goto err; -- cgit v1.2.2 From 1a577b72475d161b6677c05abe57301362023bb2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Jul 2012 13:45:20 +0300 Subject: KVM: fix race with level interrupts When more than 1 source id is in use for the same GSI, we have the following race related to handling irq_states race: CPU 0 clears bit 0. CPU 0 read irq_state as 0. CPU 1 sets level to 1. CPU 1 calls kvm_ioapic_set_irq(1). CPU 0 calls kvm_ioapic_set_irq(0). Now ioapic thinks the level is 0 but irq_state is not 0. Fix by performing all irq_states bitmap handling under pic/ioapic lock. This also removes the need for atomics with irq_states handling. Reported-by: Gleb Natapov Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti --- virt/kvm/ioapic.c | 19 ++++++++++++++++--- virt/kvm/ioapic.h | 4 +++- virt/kvm/irq_comm.c | 31 ++++--------------------------- 3 files changed, 23 insertions(+), 31 deletions(-) (limited to 'virt') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 26fd54dc459e..ef61d529a6c4 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -191,7 +191,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); } -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, + int level) { u32 old_irr; u32 mask = 1 << irq; @@ -201,9 +202,11 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) spin_lock(&ioapic->lock); old_irr = ioapic->irr; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { + int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], + irq_source_id, level); entry = ioapic->redirtbl[irq]; - level ^= entry.fields.polarity; - if (!level) + irq_level ^= entry.fields.polarity; + if (!irq_level) ioapic->irr &= ~mask; else { int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); @@ -221,6 +224,16 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) return ret; } +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) +{ + int i; + + spin_lock(&ioapic->lock); + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) + __clear_bit(irq_source_id, &ioapic->irq_states[i]); + spin_unlock(&ioapic->lock); +} + static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, int trigger_mode) { diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 32872a09b63f..a30abfe6ed16 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -74,7 +74,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); -int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, + int level); +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a6a0365475ed..cc59c68da032 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -33,26 +33,12 @@ #include "ioapic.h" -static inline int kvm_irq_line_state(unsigned long *irq_state, - int irq_source_id, int level) -{ - /* Logical OR for level trig interrupt */ - if (level) - set_bit(irq_source_id, irq_state); - else - clear_bit(irq_source_id, irq_state); - - return !!(*irq_state); -} - static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level) { #ifdef CONFIG_X86 struct kvm_pic *pic = pic_irqchip(kvm); - level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], - irq_source_id, level); - return kvm_pic_set_irq(pic, e->irqchip.pin, level); + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); #else return -1; #endif @@ -62,10 +48,7 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], - irq_source_id, level); - - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -249,8 +232,6 @@ unlock: void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) { - int i; - ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); mutex_lock(&kvm->irq_lock); @@ -263,14 +244,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) if (!irqchip_in_kernel(kvm)) goto unlock; - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { - clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); - if (i >= 16) - continue; + kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); #ifdef CONFIG_X86 - clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); + kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); #endif - } unlock: mutex_unlock(&kvm->irq_lock); } -- cgit v1.2.2