aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorSuraj Jitindar Singh <sjitindarsingh@gmail.com>2016-08-02 00:03:21 -0400
committerPaul Mackerras <paulus@ozlabs.org>2016-09-07 22:21:45 -0400
commit0cda69dd7cd64fdd54bdf584b5d6ba53767ba422 (patch)
tree0628a38e0f0035b7cb262a9308a909f580ba6f71 /arch/powerpc/kvm
parent7b5f8272c792d49da73d98e9ca32f4cbb6d53808 (diff)
KVM: PPC: Book3S HV: Implement halt polling
This patch introduces new halt polling functionality into the kvm_hv kernel module. When a vcore is idle it will poll for some period of time before scheduling itself out. When all of the runnable vcpus on a vcore have ceded (and thus the vcore is idle) we schedule ourselves out to allow something else to run. In the event that we need to wake up very quickly (for example an interrupt arrives), we are required to wait until we get scheduled again. Implement halt polling so that when a vcore is idle, and before scheduling ourselves, we poll for vcpus in the runnable_threads list which have pending exceptions or which leave the ceded state. If we poll successfully then we can get back into the guest very quickly without ever scheduling ourselves, otherwise we schedule ourselves out as before. There exists generic halt_polling code in virt/kvm_main.c, however on powerpc the polling conditions are different to the generic case. It would be nice if we could just implement an arch specific kvm_check_block() function, but there is still other arch specific things which need to be done for kvm_hv (for example manipulating vcore states) which means that a separate implementation is the best option. Testing of this patch with a TCP round robin test between two guests with virtio network interfaces has found a decrease in round trip time of ~15us on average. A performance gain is only seen when going out of and back into the guest often and quickly, otherwise there is no net benefit from the polling. The polling interval is adjusted such that when we are often scheduled out for long periods of time it is reduced, and when we often poll successfully it is increased. The rate at which the polling interval increases or decreases, and the maximum polling interval, can be set through module parameters. Based on the implementation in the generic kvm module by Wanpeng Li and Paolo Bonzini, and on direction from Paul Mackerras. Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c116
-rw-r--r--arch/powerpc/kvm/trace_hv.h22
2 files changed, 124 insertions, 14 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ebbab1b2206c..3c85c3b28fc5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -95,6 +95,23 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
95MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); 95MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
96#endif 96#endif
97 97
98/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
99static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
100module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
101MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
102
103/* Factor by which the vcore halt poll interval is grown, default is to double
104 */
105static unsigned int halt_poll_ns_grow = 2;
106module_param(halt_poll_ns_grow, int, S_IRUGO);
107MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
108
109/* Factor by which the vcore halt poll interval is shrunk, default is to reset
110 */
111static unsigned int halt_poll_ns_shrink;
112module_param(halt_poll_ns_shrink, int, S_IRUGO);
113MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
114
98static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 115static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
99static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 116static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
100 117
@@ -2621,32 +2638,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2621 finish_wait(&vcpu->arch.cpu_run, &wait); 2638 finish_wait(&vcpu->arch.cpu_run, &wait);
2622} 2639}
2623 2640
2641static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
2642{
2643 /* 10us base */
2644 if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
2645 vc->halt_poll_ns = 10000;
2646 else
2647 vc->halt_poll_ns *= halt_poll_ns_grow;
2648
2649 if (vc->halt_poll_ns > halt_poll_max_ns)
2650 vc->halt_poll_ns = halt_poll_max_ns;
2651}
2652
2653static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
2654{
2655 if (halt_poll_ns_shrink == 0)
2656 vc->halt_poll_ns = 0;
2657 else
2658 vc->halt_poll_ns /= halt_poll_ns_shrink;
2659}
2660
2661/* Check to see if any of the runnable vcpus on the vcore have pending
2662 * exceptions or are no longer ceded
2663 */
2664static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
2665{
2666 struct kvm_vcpu *vcpu;
2667 int i;
2668
2669 for_each_runnable_thread(i, vcpu, vc) {
2670 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
2671 return 1;
2672 }
2673
2674 return 0;
2675}
2676
2624/* 2677/*
2625 * All the vcpus in this vcore are idle, so wait for a decrementer 2678 * All the vcpus in this vcore are idle, so wait for a decrementer
2626 * or external interrupt to one of the vcpus. vc->lock is held. 2679 * or external interrupt to one of the vcpus. vc->lock is held.
2627 */ 2680 */
2628static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 2681static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
2629{ 2682{
2630 struct kvm_vcpu *vcpu; 2683 int do_sleep = 1;
2631 int do_sleep = 1, i; 2684 ktime_t cur, start;
2685 u64 block_ns;
2632 DECLARE_SWAITQUEUE(wait); 2686 DECLARE_SWAITQUEUE(wait);
2633 2687
2634 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 2688 /* Poll for pending exceptions and ceded state */
2689 cur = start = ktime_get();
2690 if (vc->halt_poll_ns) {
2691 ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
2635 2692
2636 /* 2693 vc->vcore_state = VCORE_POLLING;
2637 * Check one last time for pending exceptions and ceded state after 2694 spin_unlock(&vc->lock);
2638 * we put ourselves on the wait queue 2695
2639 */ 2696 do {
2640 for_each_runnable_thread(i, vcpu, vc) { 2697 if (kvmppc_vcore_check_block(vc)) {
2641 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { 2698 do_sleep = 0;
2642 do_sleep = 0; 2699 break;
2643 break; 2700 }
2644 } 2701 cur = ktime_get();
2702 } while (single_task_running() && ktime_before(cur, stop));
2703
2704 spin_lock(&vc->lock);
2705 vc->vcore_state = VCORE_INACTIVE;
2706
2707 if (!do_sleep)
2708 goto out;
2645 } 2709 }
2646 2710
2647 if (!do_sleep) { 2711 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
2712
2713 if (kvmppc_vcore_check_block(vc)) {
2648 finish_swait(&vc->wq, &wait); 2714 finish_swait(&vc->wq, &wait);
2649 return; 2715 do_sleep = 0;
2716 goto out;
2650 } 2717 }
2651 2718
2652 vc->vcore_state = VCORE_SLEEPING; 2719 vc->vcore_state = VCORE_SLEEPING;
@@ -2657,6 +2724,27 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
2657 spin_lock(&vc->lock); 2724 spin_lock(&vc->lock);
2658 vc->vcore_state = VCORE_INACTIVE; 2725 vc->vcore_state = VCORE_INACTIVE;
2659 trace_kvmppc_vcore_blocked(vc, 1); 2726 trace_kvmppc_vcore_blocked(vc, 1);
2727
2728 cur = ktime_get();
2729
2730out:
2731 block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
2732
2733 /* Adjust poll time */
2734 if (halt_poll_max_ns) {
2735 if (block_ns <= vc->halt_poll_ns)
2736 ;
2737 /* We slept and blocked for longer than the max halt time */
2738 else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
2739 shrink_halt_poll_ns(vc);
2740 /* We slept and our poll time is too small */
2741 else if (vc->halt_poll_ns < halt_poll_max_ns &&
2742 block_ns < halt_poll_max_ns)
2743 grow_halt_poll_ns(vc);
2744 } else
2745 vc->halt_poll_ns = 0;
2746
2747 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
2660} 2748}
2661 2749
2662static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 2750static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 33d9daff5783..fb21990c0fb4 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
432 __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) 432 __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
433); 433);
434 434
435TRACE_EVENT(kvmppc_vcore_wakeup,
436 TP_PROTO(int do_sleep, __u64 ns),
437
438 TP_ARGS(do_sleep, ns),
439
440 TP_STRUCT__entry(
441 __field(__u64, ns)
442 __field(int, waited)
443 __field(pid_t, tgid)
444 ),
445
446 TP_fast_assign(
447 __entry->ns = ns;
448 __entry->waited = do_sleep;
449 __entry->tgid = current->tgid;
450 ),
451
452 TP_printk("%s time %lld ns, tgid=%d",
453 __entry->waited ? "wait" : "poll",
454 __entry->ns, __entry->tgid)
455);
456
435TRACE_EVENT(kvmppc_run_vcpu_enter, 457TRACE_EVENT(kvmppc_run_vcpu_enter,
436 TP_PROTO(struct kvm_vcpu *vcpu), 458 TP_PROTO(struct kvm_vcpu *vcpu),
437 459