KVM: VMX: dynamise PLE window

Window is increased on every PLE exit and decreased on every sched_in. The idea is that we don't want to PLE exit if there is no preemption going on. We do this with sched_in() because it does not hold rq lock. There are two new kernel parameters for changing the window: ple_window_grow and ple_window_shrink ple_window_grow affects the window on PLE exit and ple_window_shrink does it on sched_in; depending on their value, the window is modifier like this: (ple_window is kvm_intel's global) ple_window_shrink/ | ple_window_grow | PLE exit | sched_in -------------------+--------------------+--------------------- < 1 | = ple_window | = ple_window < ple_window | *= ple_window_grow | /= ple_window_shrink otherwise | += ple_window_grow | -= ple_window_shrink A third new parameter, ple_window_max, controls the maximal ple_window; it is internally rounded down to a closest multiple of ple_window_grow. VCPU's PLE window is never allowed below ple_window. Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Radim Krčmář <rkrcmar@redhat.com> 2014-08-21 12:08:08 -0400
committer: Paolo Bonzini <pbonzini@redhat.com> 2014-08-21 12:45:23 -0400
commit: b4a2d31da812ce03efaf5d30c6b9d39c1cbd18d8 (patch)
tree: e4596441b650badb49e4f46d75465d223e2dd9c4
parent: a7653ecdf34c68a1af4fc085511afcf7ff011903 (diff)
1 files changed, 93 insertions, 2 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70175990536b..baeac7f580a7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
 * Time is measured based on a counter that runs at the same rate as the TSC,
 * refer SDM volume 3b section 21.6.13 & 22.1.3.
 */
-#define KVM_VMX_DEFAULT_PLE_GAP    128
+#define KVM_VMX_DEFAULT_PLE_GAP           128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
+                INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
 extern const ulong vmx_return;
 #define NR_AUTOLOAD_MSRS 8
@@ -5683,12 +5701,81 @@ out:
        return ret;
 }
+static int __grow_ple_window(int val)
+{
+        if (ple_window_grow < 1)
+                return ple_window;
+        val = min(val, ple_window_actual_max);
+        if (ple_window_grow < ple_window)
+                val *= ple_window_grow;
+        else
+                val += ple_window_grow;
+        return val;
+}
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+        if (modifier < 1)
+                return ple_window;
+        if (modifier < ple_window)
+                val /= modifier;
+        else
+                val -= modifier;
+        return max(val, minimum);
+}
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+        struct vcpu_vmx *vmx = to_vmx(vcpu);
+        int old = vmx->ple_window;
+        vmx->ple_window = __grow_ple_window(old);
+        if (vmx->ple_window != old)
+                vmx->ple_window_dirty = true;
+}
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+        struct vcpu_vmx *vmx = to_vmx(vcpu);
+        int old = vmx->ple_window;
+        vmx->ple_window = __shrink_ple_window(old,
+                                              ple_window_shrink, ple_window);
+        if (vmx->ple_window != old)
+                vmx->ple_window_dirty = true;
+}
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+        ple_window_actual_max =
+                        __shrink_ple_window(max(ple_window_max, ple_window),
+                                            ple_window_grow, INT_MIN);
+}
 /*
 * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
 * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
 */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+        if (ple_gap)
+                grow_ple_window(vcpu);
        skip_emulated_instruction(vcpu);
        kvm_vcpu_on_spin(vcpu);
@@ -8860,6 +8947,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+        if (ple_gap)
+                shrink_ple_window(vcpu);
 }
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -9082,6 +9171,8 @@ static int __init vmx_init(void)
        } else
                kvm_disable_tdp();
+        update_ple_window_actual_max();
        return 0;
 out7:
author	Radim Krčmář <rkrcmar@redhat.com>	2014-08-21 12:08:08 -0400
committer	Paolo Bonzini <pbonzini@redhat.com>	2014-08-21 12:45:23 -0400
commit	b4a2d31da812ce03efaf5d30c6b9d39c1cbd18d8 (patch)
tree	e4596441b650badb49e4f46d75465d223e2dd9c4
parent	a7653ecdf34c68a1af4fc085511afcf7ff011903 (diff)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 70175990536b..baeac7f580a7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
125	* Time is measured based on a counter that runs at the same rate as the TSC,	125	* Time is measured based on a counter that runs at the same rate as the TSC,
126	* refer SDM volume 3b section 21.6.13 & 22.1.3.	126	* refer SDM volume 3b section 21.6.13 & 22.1.3.
127	*/	127	*/
128	#define KVM_VMX_DEFAULT_PLE_GAP 128	128	#define KVM_VMX_DEFAULT_PLE_GAP 128
129	#define KVM_VMX_DEFAULT_PLE_WINDOW 4096	129	#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
		130	#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
		131	#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
		132	#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
		133	INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
		134
130	static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;	135	static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
131	module_param(ple_gap, int, S_IRUGO);	136	module_param(ple_gap, int, S_IRUGO);
132		137
133	static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;	138	static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
134	module_param(ple_window, int, S_IRUGO);	139	module_param(ple_window, int, S_IRUGO);
135		140
		141	/* Default doubles per-vcpu window every exit. */
		142	static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
		143	module_param(ple_window_grow, int, S_IRUGO);
		144
		145	/* Default resets per-vcpu window every exit to ple_window. */
		146	static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
		147	module_param(ple_window_shrink, int, S_IRUGO);
		148
		149	/* Default is to compute the maximum so we can never overflow. */
		150	static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
		151	static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
		152	module_param(ple_window_max, int, S_IRUGO);
		153
136	extern const ulong vmx_return;	154	extern const ulong vmx_return;
137		155
138	#define NR_AUTOLOAD_MSRS 8	156	#define NR_AUTOLOAD_MSRS 8
@@ -5683,12 +5701,81 @@ out:
5683	return ret;	5701	return ret;
5684	}	5702	}
5685		5703
		5704	static int __grow_ple_window(int val)
		5705	{
		5706	if (ple_window_grow < 1)
		5707	return ple_window;
		5708
		5709	val = min(val, ple_window_actual_max);
		5710
		5711	if (ple_window_grow < ple_window)
		5712	val *= ple_window_grow;
		5713	else
		5714	val += ple_window_grow;
		5715
		5716	return val;
		5717	}
		5718
		5719	static int __shrink_ple_window(int val, int modifier, int minimum)
		5720	{
		5721	if (modifier < 1)
		5722	return ple_window;
		5723
		5724	if (modifier < ple_window)
		5725	val /= modifier;
		5726	else
		5727	val -= modifier;
		5728
		5729	return max(val, minimum);
		5730	}
		5731
		5732	static void grow_ple_window(struct kvm_vcpu *vcpu)
		5733	{
		5734	struct vcpu_vmx *vmx = to_vmx(vcpu);
		5735	int old = vmx->ple_window;
		5736
		5737	vmx->ple_window = __grow_ple_window(old);
		5738
		5739	if (vmx->ple_window != old)
		5740	vmx->ple_window_dirty = true;
		5741	}
		5742
		5743	static void shrink_ple_window(struct kvm_vcpu *vcpu)
		5744	{
		5745	struct vcpu_vmx *vmx = to_vmx(vcpu);
		5746	int old = vmx->ple_window;
		5747
		5748	vmx->ple_window = __shrink_ple_window(old,
		5749	ple_window_shrink, ple_window);
		5750
		5751	if (vmx->ple_window != old)
		5752	vmx->ple_window_dirty = true;
		5753	}
		5754
		5755	/*
		5756	* ple_window_actual_max is computed to be one grow_ple_window() below
		5757	* ple_window_max. (See __grow_ple_window for the reason.)
		5758	* This prevents overflows, because ple_window_max is int.
		5759	* ple_window_max effectively rounded down to a multiple of ple_window_grow in
		5760	* this process.
		5761	* ple_window_max is also prevented from setting vmx->ple_window < ple_window.
		5762	*/
		5763	static void update_ple_window_actual_max(void)
		5764	{
		5765	ple_window_actual_max =
		5766	__shrink_ple_window(max(ple_window_max, ple_window),
		5767	ple_window_grow, INT_MIN);
		5768	}
		5769
5686	/*	5770	/*
5687	* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE	5771	* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
5688	* exiting, so only get here on cpu with PAUSE-Loop-Exiting.	5772	* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
5689	*/	5773	*/
5690	static int handle_pause(struct kvm_vcpu *vcpu)	5774	static int handle_pause(struct kvm_vcpu *vcpu)
5691	{	5775	{
		5776	if (ple_gap)
		5777	grow_ple_window(vcpu);
		5778
5692	skip_emulated_instruction(vcpu);	5779	skip_emulated_instruction(vcpu);
5693	kvm_vcpu_on_spin(vcpu);	5780	kvm_vcpu_on_spin(vcpu);
5694		5781
@@ -8860,6 +8947,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
8860		8947
8861	void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)	8948	void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
8862	{	8949	{
		8950	if (ple_gap)
		8951	shrink_ple_window(vcpu);
8863	}	8952	}
8864		8953
8865	static struct kvm_x86_ops vmx_x86_ops = {	8954	static struct kvm_x86_ops vmx_x86_ops = {
@@ -9082,6 +9171,8 @@ static int __init vmx_init(void)
9082	} else	9171	} else
9083	kvm_disable_tdp();	9172	kvm_disable_tdp();
9084		9173
		9174	update_ple_window_actual_max();
		9175
9085	return 0;	9176	return 0;
9086		9177
9087	out7:	9178	out7: