diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2014-08-21 12:08:08 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2014-08-21 12:45:23 -0400 |
commit | b4a2d31da812ce03efaf5d30c6b9d39c1cbd18d8 (patch) | |
tree | e4596441b650badb49e4f46d75465d223e2dd9c4 | |
parent | a7653ecdf34c68a1af4fc085511afcf7ff011903 (diff) |
KVM: VMX: dynamise PLE window
Window is increased on every PLE exit and decreased on every sched_in.
The idea is that we don't want to PLE exit if there is no preemption
going on.
We do this with sched_in() because it does not hold rq lock.
There are two new kernel parameters for changing the window:
ple_window_grow and ple_window_shrink
ple_window_grow affects the window on PLE exit and ple_window_shrink
does it on sched_in; depending on their value, the window is modifier
like this: (ple_window is kvm_intel's global)
ple_window_shrink/ |
ple_window_grow | PLE exit | sched_in
-------------------+--------------------+---------------------
< 1 | = ple_window | = ple_window
< ple_window | *= ple_window_grow | /= ple_window_shrink
otherwise | += ple_window_grow | -= ple_window_shrink
A third new parameter, ple_window_max, controls the maximal ple_window;
it is internally rounded down to a closest multiple of ple_window_grow.
VCPU's PLE window is never allowed below ple_window.
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r-- | arch/x86/kvm/vmx.c | 95 |
1 files changed, 93 insertions, 2 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70175990536b..baeac7f580a7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO); | |||
125 | * Time is measured based on a counter that runs at the same rate as the TSC, | 125 | * Time is measured based on a counter that runs at the same rate as the TSC, |
126 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 126 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
127 | */ | 127 | */ |
128 | #define KVM_VMX_DEFAULT_PLE_GAP 128 | 128 | #define KVM_VMX_DEFAULT_PLE_GAP 128 |
129 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | 129 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 |
130 | #define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2 | ||
131 | #define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0 | ||
132 | #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \ | ||
133 | INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW | ||
134 | |||
130 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 135 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; |
131 | module_param(ple_gap, int, S_IRUGO); | 136 | module_param(ple_gap, int, S_IRUGO); |
132 | 137 | ||
133 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | 138 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; |
134 | module_param(ple_window, int, S_IRUGO); | 139 | module_param(ple_window, int, S_IRUGO); |
135 | 140 | ||
141 | /* Default doubles per-vcpu window every exit. */ | ||
142 | static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW; | ||
143 | module_param(ple_window_grow, int, S_IRUGO); | ||
144 | |||
145 | /* Default resets per-vcpu window every exit to ple_window. */ | ||
146 | static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK; | ||
147 | module_param(ple_window_shrink, int, S_IRUGO); | ||
148 | |||
149 | /* Default is to compute the maximum so we can never overflow. */ | ||
150 | static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; | ||
151 | static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; | ||
152 | module_param(ple_window_max, int, S_IRUGO); | ||
153 | |||
136 | extern const ulong vmx_return; | 154 | extern const ulong vmx_return; |
137 | 155 | ||
138 | #define NR_AUTOLOAD_MSRS 8 | 156 | #define NR_AUTOLOAD_MSRS 8 |
@@ -5683,12 +5701,81 @@ out: | |||
5683 | return ret; | 5701 | return ret; |
5684 | } | 5702 | } |
5685 | 5703 | ||
5704 | static int __grow_ple_window(int val) | ||
5705 | { | ||
5706 | if (ple_window_grow < 1) | ||
5707 | return ple_window; | ||
5708 | |||
5709 | val = min(val, ple_window_actual_max); | ||
5710 | |||
5711 | if (ple_window_grow < ple_window) | ||
5712 | val *= ple_window_grow; | ||
5713 | else | ||
5714 | val += ple_window_grow; | ||
5715 | |||
5716 | return val; | ||
5717 | } | ||
5718 | |||
5719 | static int __shrink_ple_window(int val, int modifier, int minimum) | ||
5720 | { | ||
5721 | if (modifier < 1) | ||
5722 | return ple_window; | ||
5723 | |||
5724 | if (modifier < ple_window) | ||
5725 | val /= modifier; | ||
5726 | else | ||
5727 | val -= modifier; | ||
5728 | |||
5729 | return max(val, minimum); | ||
5730 | } | ||
5731 | |||
5732 | static void grow_ple_window(struct kvm_vcpu *vcpu) | ||
5733 | { | ||
5734 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5735 | int old = vmx->ple_window; | ||
5736 | |||
5737 | vmx->ple_window = __grow_ple_window(old); | ||
5738 | |||
5739 | if (vmx->ple_window != old) | ||
5740 | vmx->ple_window_dirty = true; | ||
5741 | } | ||
5742 | |||
5743 | static void shrink_ple_window(struct kvm_vcpu *vcpu) | ||
5744 | { | ||
5745 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
5746 | int old = vmx->ple_window; | ||
5747 | |||
5748 | vmx->ple_window = __shrink_ple_window(old, | ||
5749 | ple_window_shrink, ple_window); | ||
5750 | |||
5751 | if (vmx->ple_window != old) | ||
5752 | vmx->ple_window_dirty = true; | ||
5753 | } | ||
5754 | |||
5755 | /* | ||
5756 | * ple_window_actual_max is computed to be one grow_ple_window() below | ||
5757 | * ple_window_max. (See __grow_ple_window for the reason.) | ||
5758 | * This prevents overflows, because ple_window_max is int. | ||
5759 | * ple_window_max effectively rounded down to a multiple of ple_window_grow in | ||
5760 | * this process. | ||
5761 | * ple_window_max is also prevented from setting vmx->ple_window < ple_window. | ||
5762 | */ | ||
5763 | static void update_ple_window_actual_max(void) | ||
5764 | { | ||
5765 | ple_window_actual_max = | ||
5766 | __shrink_ple_window(max(ple_window_max, ple_window), | ||
5767 | ple_window_grow, INT_MIN); | ||
5768 | } | ||
5769 | |||
5686 | /* | 5770 | /* |
5687 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | 5771 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE |
5688 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | 5772 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. |
5689 | */ | 5773 | */ |
5690 | static int handle_pause(struct kvm_vcpu *vcpu) | 5774 | static int handle_pause(struct kvm_vcpu *vcpu) |
5691 | { | 5775 | { |
5776 | if (ple_gap) | ||
5777 | grow_ple_window(vcpu); | ||
5778 | |||
5692 | skip_emulated_instruction(vcpu); | 5779 | skip_emulated_instruction(vcpu); |
5693 | kvm_vcpu_on_spin(vcpu); | 5780 | kvm_vcpu_on_spin(vcpu); |
5694 | 5781 | ||
@@ -8860,6 +8947,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, | |||
8860 | 8947 | ||
8861 | void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) | 8948 | void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) |
8862 | { | 8949 | { |
8950 | if (ple_gap) | ||
8951 | shrink_ple_window(vcpu); | ||
8863 | } | 8952 | } |
8864 | 8953 | ||
8865 | static struct kvm_x86_ops vmx_x86_ops = { | 8954 | static struct kvm_x86_ops vmx_x86_ops = { |
@@ -9082,6 +9171,8 @@ static int __init vmx_init(void) | |||
9082 | } else | 9171 | } else |
9083 | kvm_disable_tdp(); | 9172 | kvm_disable_tdp(); |
9084 | 9173 | ||
9174 | update_ple_window_actual_max(); | ||
9175 | |||
9085 | return 0; | 9176 | return 0; |
9086 | 9177 | ||
9087 | out7: | 9178 | out7: |