aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2015-02-04 12:20:58 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2015-02-06 07:08:37 -0500
commitf7819512996361280b86259222456fcf15aad926 (patch)
tree2053c7f0a1acef58ba0987e1002b1ef18e048e4f
parent1c2b364b225a5a93dbd1f317bd000d2fec2694be (diff)
kvm: add halt_poll_ns module parameter
This patch introduces a new module parameter for the KVM module; when it is present, KVM attempts a bit of polling on every HLT before scheduling itself out via kvm_vcpu_block. This parameter helps a lot for latency-bound workloads---in particular I tested it with O_DSYNC writes with a battery-backed disk in the host. In this case, writes are fast (because the data doesn't have to go all the way to the platters) but they cannot be merged by either the host or the guest. KVM's performance here is usually around 30% of bare metal, or 50% if you use cache=directsync or cache=writethrough (these parameters avoid that the guest sends pointless flush requests, and at the same time they are not slow because of the battery-backed cache). The bad performance happens because on every halt the host CPU decides to halt itself too. When the interrupt comes, the vCPU thread is then migrated to a new physical CPU, and in general the latency is horrible because the vCPU thread has to be scheduled back in. With this patch performance reaches 60-65% of bare metal and, more important, 99% of what you get if you use idle=poll in the guest. This means that the tunable gets rid of this particular bottleneck, and more work can be done to improve performance in the kernel or QEMU. Of course there is some price to pay; every time an otherwise idle vCPUs is interrupted by an interrupt, it will poll unnecessarily and thus impose a little load on the host. The above results were obtained with a mostly random value of the parameter (500000), and the load was around 1.5-2.5% CPU usage on one of the host's core for each idle guest vCPU. The patch also adds a new stat, /sys/kernel/debug/kvm/halt_successful_poll, that can be used to tune the parameter. It counts how many HLT instructions received an interrupt during the polling period; each successful poll avoids that Linux schedules the VCPU thread out and back in, and may also avoid a likely trip to C1 and back for the physical CPU. While the VM is idle, a Linux 4 VCPU VM halts around 10 times per second. Of these halts, almost all are failed polls. During the benchmark, instead, basically all halts end within the polling period, except a more or less constant stream of 50 per second coming from vCPUs that are not running the benchmark. The wasted time is thus very low. Things may be slightly different for Windows VMs, which have a ~10 ms timer tick. The effect is also visible on Marcelo's recently-introduced latency test for the TSC deadline timer. Though of course a non-RT kernel has awful latency bounds, the latency of the timer is around 8000-10000 clock cycles compared to 20000-120000 without setting halt_poll_ns. For the TSC deadline timer, thus, the effect is both a smaller average latency and a smaller variance. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/arm/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/mips/include/asm/kvm_host.h1
-rw-r--r--arch/mips/kvm/mips.c1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/kvm/book3s.c1
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/x86.c1
-rw-r--r--include/trace/events/kvm.h19
-rw-r--r--virt/kvm/kvm_main.c48
13 files changed, 71 insertions, 7 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index bde494654bcc..6a79314bc1df 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -148,6 +148,7 @@ struct kvm_vm_stat {
148}; 148};
149 149
150struct kvm_vcpu_stat { 150struct kvm_vcpu_stat {
151 u32 halt_successful_poll;
151 u32 halt_wakeup; 152 u32 halt_wakeup;
152}; 153};
153 154
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2c49aa4ac818..8efde89613f2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -165,6 +165,7 @@ struct kvm_vm_stat {
165}; 165};
166 166
167struct kvm_vcpu_stat { 167struct kvm_vcpu_stat {
168 u32 halt_successful_poll;
168 u32 halt_wakeup; 169 u32 halt_wakeup;
169}; 170};
170 171
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index f2c249796ea8..ac4fc716062b 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -120,6 +120,7 @@ struct kvm_vcpu_stat {
120 u32 resvd_inst_exits; 120 u32 resvd_inst_exits;
121 u32 break_inst_exits; 121 u32 break_inst_exits;
122 u32 flush_dcache_exits; 122 u32 flush_dcache_exits;
123 u32 halt_successful_poll;
123 u32 halt_wakeup; 124 u32 halt_wakeup;
124}; 125};
125 126
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index e97b90784031..c9eccf5df912 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
49 { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, 49 { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU },
50 { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, 50 { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU },
51 { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, 51 { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
52 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
52 { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, 53 { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU },
53 {NULL} 54 {NULL}
54}; 55};
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7efd666a3fa7..8ef05121d3cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -107,6 +107,7 @@ struct kvm_vcpu_stat {
107 u32 emulated_inst_exits; 107 u32 emulated_inst_exits;
108 u32 dec_exits; 108 u32 dec_exits;
109 u32 ext_intr_exits; 109 u32 ext_intr_exits;
110 u32 halt_successful_poll;
110 u32 halt_wakeup; 111 u32 halt_wakeup;
111 u32 dbell_exits; 112 u32 dbell_exits;
112 u32 gdbell_exits; 113 u32 gdbell_exits;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 888bf466d8c6..cfbcdc654201 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -52,6 +52,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
52 { "dec", VCPU_STAT(dec_exits) }, 52 { "dec", VCPU_STAT(dec_exits) },
53 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 53 { "ext_intr", VCPU_STAT(ext_intr_exits) },
54 { "queue_intr", VCPU_STAT(queue_intr) }, 54 { "queue_intr", VCPU_STAT(queue_intr) },
55 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
55 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 56 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
56 { "pf_storage", VCPU_STAT(pf_storage) }, 57 { "pf_storage", VCPU_STAT(pf_storage) },
57 { "sp_storage", VCPU_STAT(sp_storage) }, 58 { "sp_storage", VCPU_STAT(sp_storage) },
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 9b55dec2d6cc..6c1316a15a27 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -62,6 +62,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "inst_emu", VCPU_STAT(emulated_inst_exits) }, 62 { "inst_emu", VCPU_STAT(emulated_inst_exits) },
63 { "dec", VCPU_STAT(dec_exits) }, 63 { "dec", VCPU_STAT(dec_exits) },
64 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 64 { "ext_intr", VCPU_STAT(ext_intr_exits) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
65 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 66 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
66 { "doorbell", VCPU_STAT(dbell_exits) }, 67 { "doorbell", VCPU_STAT(dbell_exits) },
67 { "guest doorbell", VCPU_STAT(gdbell_exits) }, 68 { "guest doorbell", VCPU_STAT(gdbell_exits) },
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d1ecc7fd0579..f79058e3fd98 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -196,6 +196,7 @@ struct kvm_vcpu_stat {
196 u32 exit_stop_request; 196 u32 exit_stop_request;
197 u32 exit_validity; 197 u32 exit_validity;
198 u32 exit_instruction; 198 u32 exit_instruction;
199 u32 halt_successful_poll;
199 u32 halt_wakeup; 200 u32 halt_wakeup;
200 u32 instruction_lctl; 201 u32 instruction_lctl;
201 u32 instruction_lctlg; 202 u32 instruction_lctlg;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b2371c0fd1f8..1dbab2340a66 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -51,6 +51,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
51 { "exit_instruction", VCPU_STAT(exit_instruction) }, 51 { "exit_instruction", VCPU_STAT(exit_instruction) },
52 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 52 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
53 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 53 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
54 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
54 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 55 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
55 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 56 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
56 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 57 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 848947ac6ade..a236e39cc385 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -655,6 +655,7 @@ struct kvm_vcpu_stat {
655 u32 irq_window_exits; 655 u32 irq_window_exits;
656 u32 nmi_window_exits; 656 u32 nmi_window_exits;
657 u32 halt_exits; 657 u32 halt_exits;
658 u32 halt_successful_poll;
658 u32 halt_wakeup; 659 u32 halt_wakeup;
659 u32 request_irq_exits; 660 u32 request_irq_exits;
660 u32 irq_exits; 661 u32 irq_exits;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1373e04e1f19..bd7a70be41b3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -145,6 +145,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
145 { "irq_window", VCPU_STAT(irq_window_exits) }, 145 { "irq_window", VCPU_STAT(irq_window_exits) },
146 { "nmi_window", VCPU_STAT(nmi_window_exits) }, 146 { "nmi_window", VCPU_STAT(nmi_window_exits) },
147 { "halt_exits", VCPU_STAT(halt_exits) }, 147 { "halt_exits", VCPU_STAT(halt_exits) },
148 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
148 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 149 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
149 { "hypercalls", VCPU_STAT(hypercalls) }, 150 { "hypercalls", VCPU_STAT(hypercalls) },
150 { "request_irq", VCPU_STAT(request_irq_exits) }, 151 { "request_irq", VCPU_STAT(request_irq_exits) },
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 6edf1f2028cd..6bfe7eec1c2c 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,6 +37,25 @@ TRACE_EVENT(kvm_userspace_exit,
37 __entry->errno < 0 ? -__entry->errno : __entry->reason) 37 __entry->errno < 0 ? -__entry->errno : __entry->reason)
38); 38);
39 39
40TRACE_EVENT(kvm_vcpu_wakeup,
41 TP_PROTO(__u64 ns, bool waited),
42 TP_ARGS(ns, waited),
43
44 TP_STRUCT__entry(
45 __field( __u64, ns )
46 __field( bool, waited )
47 ),
48
49 TP_fast_assign(
50 __entry->ns = ns;
51 __entry->waited = waited;
52 ),
53
54 TP_printk("%s time %lld ns",
55 __entry->waited ? "wait" : "poll",
56 __entry->ns)
57);
58
40#if defined(CONFIG_HAVE_KVM_IRQFD) 59#if defined(CONFIG_HAVE_KVM_IRQFD)
41TRACE_EVENT(kvm_set_irq, 60TRACE_EVENT(kvm_set_irq,
42 TP_PROTO(unsigned int gsi, int level, int irq_source_id), 61 TP_PROTO(unsigned int gsi, int level, int irq_source_id),
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0c281760a1c5..32449e0e9aa8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,6 +66,9 @@
66MODULE_AUTHOR("Qumranet"); 66MODULE_AUTHOR("Qumranet");
67MODULE_LICENSE("GPL"); 67MODULE_LICENSE("GPL");
68 68
69unsigned int halt_poll_ns = 0;
70module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
71
69/* 72/*
70 * Ordering of locks: 73 * Ordering of locks:
71 * 74 *
@@ -1813,29 +1816,60 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1813} 1816}
1814EXPORT_SYMBOL_GPL(mark_page_dirty); 1817EXPORT_SYMBOL_GPL(mark_page_dirty);
1815 1818
1819static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
1820{
1821 if (kvm_arch_vcpu_runnable(vcpu)) {
1822 kvm_make_request(KVM_REQ_UNHALT, vcpu);
1823 return -EINTR;
1824 }
1825 if (kvm_cpu_has_pending_timer(vcpu))
1826 return -EINTR;
1827 if (signal_pending(current))
1828 return -EINTR;
1829
1830 return 0;
1831}
1832
1816/* 1833/*
1817 * The vCPU has executed a HLT instruction with in-kernel mode enabled. 1834 * The vCPU has executed a HLT instruction with in-kernel mode enabled.
1818 */ 1835 */
1819void kvm_vcpu_block(struct kvm_vcpu *vcpu) 1836void kvm_vcpu_block(struct kvm_vcpu *vcpu)
1820{ 1837{
1838 ktime_t start, cur;
1821 DEFINE_WAIT(wait); 1839 DEFINE_WAIT(wait);
1840 bool waited = false;
1841
1842 start = cur = ktime_get();
1843 if (halt_poll_ns) {
1844 ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
1845 do {
1846 /*
1847 * This sets KVM_REQ_UNHALT if an interrupt
1848 * arrives.
1849 */
1850 if (kvm_vcpu_check_block(vcpu) < 0) {
1851 ++vcpu->stat.halt_successful_poll;
1852 goto out;
1853 }
1854 cur = ktime_get();
1855 } while (single_task_running() && ktime_before(cur, stop));
1856 }
1822 1857
1823 for (;;) { 1858 for (;;) {
1824 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); 1859 prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
1825 1860
1826 if (kvm_arch_vcpu_runnable(vcpu)) { 1861 if (kvm_vcpu_check_block(vcpu) < 0)
1827 kvm_make_request(KVM_REQ_UNHALT, vcpu);
1828 break;
1829 }
1830 if (kvm_cpu_has_pending_timer(vcpu))
1831 break;
1832 if (signal_pending(current))
1833 break; 1862 break;
1834 1863
1864 waited = true;
1835 schedule(); 1865 schedule();
1836 } 1866 }
1837 1867
1838 finish_wait(&vcpu->wq, &wait); 1868 finish_wait(&vcpu->wq, &wait);
1869 cur = ktime_get();
1870
1871out:
1872 trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
1839} 1873}
1840EXPORT_SYMBOL_GPL(kvm_vcpu_block); 1874EXPORT_SYMBOL_GPL(kvm_vcpu_block);
1841 1875