From c54cdf141c40a5115774e91fc947c34e91df0259 Mon Sep 17 00:00:00 2001 From: Liang Chen Date: Wed, 16 Mar 2016 19:33:16 +0800 Subject: KVM: x86: optimize steal time calculation Since accumulate_steal_time is now only called in record_steal_time, it doesn't quite make sense to put the delta calculation in a separate function. The function could be called thousands of times before guest enables the steal time MSR (though the compiler may optimize out this function call). And after it's enabled, the MSR enable bit is tested twice every time. Removing the accumulate_steal_time function also avoids the necessity of having the accum_steal field. Signed-off-by: Liang Chen Signed-off-by: Gavin Guo Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b7798c7b210..96e81d284deb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2002,22 +2002,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) vcpu->arch.pv_time_enabled = false; } -static void accumulate_steal_time(struct kvm_vcpu *vcpu) -{ - u64 delta; - - if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) - return; - - delta = current->sched_info.run_delay - vcpu->arch.st.last_steal; - vcpu->arch.st.last_steal = current->sched_info.run_delay; - vcpu->arch.st.accum_steal = delta; -} - static void record_steal_time(struct kvm_vcpu *vcpu) { - accumulate_steal_time(vcpu); - if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -2025,9 +2011,10 @@ static void record_steal_time(struct kvm_vcpu *vcpu) &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; - vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal; + vcpu->arch.st.steal.steal += current->sched_info.run_delay - + vcpu->arch.st.last_steal; + vcpu->arch.st.last_steal = current->sched_info.run_delay; vcpu->arch.st.steal.version += 2; - vcpu->arch.st.accum_steal = 0; kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); -- cgit v1.2.2 From 35f3fae17849793c7c031b00ec972f3d9057744b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 3 May 2016 11:43:10 +0800 Subject: kvm: robustify steal time record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guest should only trust data to be valid when version haven't changed before and after reads of steal time. Besides not changing, it has to be an even number. Hypervisor may write an odd number to version field to indicate that an update is in progress. kvm_steal_clock() in guest has already done the read side, make write side in hypervisor more robust by following the above rule. Reviewed-by: Wincy Van Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 96e81d284deb..57a62d1e2f5d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2011,10 +2011,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu) &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; + if (vcpu->arch.st.steal.version & 1) + vcpu->arch.st.steal.version += 1; /* first time write, random junk */ + + vcpu->arch.st.steal.version += 1; + + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, + &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + + smp_wmb(); + vcpu->arch.st.steal.steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - vcpu->arch.st.steal.version += 2; + + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, + &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + + smp_wmb(); + + vcpu->arch.st.steal.version += 1; kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); -- cgit v1.2.2 From 14717e2031862d9aa2512b24a7df42cf68a977ec Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 5 May 2016 11:58:35 -0600 Subject: kvm: Conditionally register IRQ bypass consumer If we don't support a mechanism for bypassing IRQs, don't register as a consumer. This eliminates meaningless dev_info()s when the connect fails between producer and consumer, such as on AMD systems where kvm_x86_ops->update_pi_irte is not implemented Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 57a62d1e2f5d..6c774cdf553c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8358,19 +8358,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); +bool kvm_arch_has_irq_bypass(void) +{ + return kvm_x86_ops->update_pi_irte != NULL; +} + int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); - if (kvm_x86_ops->update_pi_irte) { - irqfd->producer = prod; - return kvm_x86_ops->update_pi_irte(irqfd->kvm, - prod->irq, irqfd->gsi, 1); - } + irqfd->producer = prod; - return -EINVAL; + return kvm_x86_ops->update_pi_irte(irqfd->kvm, + prod->irq, irqfd->gsi, 1); } void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, @@ -8380,11 +8382,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); - if (!kvm_x86_ops->update_pi_irte) { - WARN_ON(irqfd->producer != NULL); - return; - } - WARN_ON(irqfd->producer != prod); irqfd->producer = NULL; -- cgit v1.2.2 From 3491caf2755e9f312666712510d80b00c81ff247 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 13 May 2016 12:16:35 +0200 Subject: KVM: halt_polling: provide a way to qualify wakeups during poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some wakeups should not be considered a sucessful poll. For example on s390 I/O interrupts are usually floating, which means that _ALL_ CPUs would be considered runnable - letting all vCPUs poll all the time for transactional like workload, even if one vCPU would be enough. This can result in huge CPU usage for large guests. This patch lets architectures provide a way to qualify wakeups if they should be considered a good/bad wakeups in regard to polls. For s390 the implementation will fence of halt polling for anything but known good, single vCPU events. The s390 implementation for floating interrupts does a wakeup for one vCPU, but the interrupt will be delivered by whatever CPU checks first for a pending interrupt. We prefer the woken up CPU by marking the poll of this CPU as "good" poll. This code will also mark several other wakeup reasons like IPI or expired timers as "good". This will of course also mark some events as not sucessful. As KVM on z runs always as a 2nd level hypervisor, we prefer to not poll, unless we are really sure, though. This patch successfully limits the CPU usage for cases like uperf 1byte transactional ping pong workload or wakeup heavy workload like OLTP while still providing a proper speedup. This also introduced a new vcpu stat "halt_poll_no_tuning" that marks wakeups that are considered not good for polling. Signed-off-by: Christian Borntraeger Acked-by: Radim Krčmář (for an earlier version) Cc: David Matlack Cc: Wanpeng Li [Rename config symbol. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c774cdf553c..bcef92fc41d8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -161,6 +161,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, + { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, -- cgit v1.2.2 From 03543133cea646406307870823343912c1ef0a3a Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 4 May 2016 14:09:42 -0500 Subject: KVM: x86: Introducing kvm_x86_ops VM init/destroy hooks Adding function pointers in struct kvm_x86_ops for processor-specific layer to provide hooks for when KVM initialize and destroy VM. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bcef92fc41d8..7fcaed49b3c5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7756,6 +7756,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_page_track_init(kvm); kvm_mmu_init_vm(kvm); + if (kvm_x86_ops->vm_init) + return kvm_x86_ops->vm_init(kvm); + return 0; } @@ -7877,6 +7880,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); } + if (kvm_x86_ops->vm_destroy) + kvm_x86_ops->vm_destroy(kvm); kvm_iommu_unmap_guest(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); -- cgit v1.2.2 From 18f40c53e10f8d1267dc47cce4487664eececd6d Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 4 May 2016 14:09:48 -0500 Subject: svm: Add VMEXIT handlers for AVIC This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception() and avic_unaccelerated_access_interception() along with two trace points (trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access). Signed-off-by: Suravee Suthikulpanit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm/x86.c') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7fcaed49b3c5..a8c7ca34ee5d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8435,3 +8435,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi); -- cgit v1.2.2